Skip to main content

stygian_charon/
types.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5/// Target website classification for SLO thresholds.
6///
7/// Used to determine acceptable blocked ratios and risk assessments based on expected
8/// anti-bot posture. Different sites have different security requirements:
9///
10/// - **API**: Machine-to-machine communication; expects very low block ratio.
11/// - **`ContentSite`**: Public web content; moderate block tolerance.
12/// - **`HighSecurity`**: Banking, auth, sensitive data; higher block ratio acceptable.
13/// - **Unknown**: Default classification when unable to determine target type.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
15pub enum TargetClass {
16    /// REST API or GraphQL endpoint; expect clean machine-to-machine paths.
17    Api,
18    /// General content site or e-commerce; browser-like requests expected.
19    ContentSite,
20    /// High-security property (banking, auth, sensitive data); strict anti-bot expected.
21    HighSecurity,
22    /// Unknown or unclassified target.
23    Unknown,
24}
25
26/// Blocked ratio service-level objectives (SLOs) by target class.
27///
28/// Defines acceptable and concerning block ratios for different target types.
29/// These thresholds guide requirement inference and risk scoring.
30#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
31pub struct BlockedRatioSlo {
32    /// Target class for these SLOs.
33    pub target_class: TargetClass,
34    /// Acceptable block ratio (green threshold); below this is normal.
35    pub acceptable: f64,
36    /// Warning threshold; above this triggers adaptive rate requirement.
37    pub warning: f64,
38    /// Critical threshold; above this indicates severe anti-bot posture.
39    pub critical: f64,
40}
41
42impl BlockedRatioSlo {
43    /// Default SLOs for API targets (0-5% blocks, 10% warning, 15% critical).
44    #[must_use]
45    pub const fn api() -> Self {
46        Self {
47            target_class: TargetClass::Api,
48            acceptable: 0.05,
49            warning: 0.10,
50            critical: 0.15,
51        }
52    }
53
54    /// Default SLOs for content sites (0-15% blocks, 25% warning, 40% critical).
55    #[must_use]
56    pub const fn content_site() -> Self {
57        Self {
58            target_class: TargetClass::ContentSite,
59            acceptable: 0.15,
60            warning: 0.25,
61            critical: 0.40,
62        }
63    }
64
65    /// Default SLOs for high-security sites (0-30% blocks, 50% warning, 70% critical).
66    #[must_use]
67    pub const fn high_security() -> Self {
68        Self {
69            target_class: TargetClass::HighSecurity,
70            acceptable: 0.30,
71            warning: 0.50,
72            critical: 0.70,
73        }
74    }
75
76    /// Default SLOs for unknown targets (conservative: API thresholds).
77    #[must_use]
78    pub const fn unknown() -> Self {
79        Self {
80            target_class: TargetClass::Unknown,
81            acceptable: 0.05, // Same as API
82            warning: 0.10,
83            critical: 0.15,
84        }
85    }
86
87    /// Get SLO for a target class.
88    #[must_use]
89    pub const fn for_class(class: TargetClass) -> Self {
90        match class {
91            TargetClass::Api => Self::api(),
92            TargetClass::ContentSite => Self::content_site(),
93            TargetClass::HighSecurity => Self::high_security(),
94            TargetClass::Unknown => Self::unknown(),
95        }
96    }
97
98    /// Assess blocked ratio against SLO thresholds.
99    ///
100    /// Returns `(is_acceptable, is_warning, is_critical)`.
101    #[must_use]
102    pub fn assess(&self, blocked_ratio: f64) -> (bool, bool, bool) {
103        (
104            blocked_ratio <= self.acceptable,
105            blocked_ratio > self.acceptable && blocked_ratio <= self.warning,
106            blocked_ratio > self.critical,
107        )
108    }
109}
110
111/// A simplified view of one HTTP transaction used for provider classification.
112#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
113pub struct TransactionView {
114    /// Request URL.
115    pub url: String,
116    /// HTTP status code.
117    pub status: u16,
118    /// Response headers (lower/upper case are normalized by the classifier).
119    pub response_headers: BTreeMap<String, String>,
120    /// Optional response body snippet.
121    pub response_body_snippet: Option<String>,
122}
123
124/// Known anti-bot providers recognized by the classifier.
125#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
126pub enum AntiBotProvider {
127    /// `DataDome`.
128    DataDome,
129    /// Cloudflare bot/challenge stack.
130    Cloudflare,
131    /// Akamai bot manager indicators.
132    Akamai,
133    /// Human Security / `PerimeterX` indicators.
134    PerimeterX,
135    /// Kasada indicators.
136    Kasada,
137    /// Fingerprint.com markers.
138    FingerprintCom,
139    /// Catch-all when no provider-specific signatures were found.
140    Unknown,
141}
142
143/// Classification result with evidence markers.
144#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
145pub struct Detection {
146    /// Most likely provider.
147    pub provider: AntiBotProvider,
148    /// Simple confidence score in [0.0, 1.0].
149    pub confidence: f64,
150    /// Marker strings that matched.
151    pub markers: Vec<String>,
152}
153
154/// Scorecard for one provider.
155#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
156pub struct ProviderScore {
157    /// Provider represented by this score.
158    pub provider: AntiBotProvider,
159    /// Weighted score from marker matches.
160    pub score: u32,
161    /// Evidence used to produce the score.
162    pub markers: Vec<String>,
163}
164
165/// Minimal per-request summary extracted from a HAR file.
166#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
167pub struct HarRequestSummary {
168    /// URL requested.
169    pub url: String,
170    /// HTTP status code.
171    pub status: u16,
172    /// Best-effort resource type from HAR metadata.
173    pub resource_type: Option<String>,
174    /// Detection result for this request.
175    pub detection: Detection,
176}
177
178/// Full HAR classification report.
179#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
180pub struct HarClassificationReport {
181    /// URL/title from HAR page metadata when available.
182    pub page_title: Option<String>,
183    /// Summary classification for all entries.
184    pub aggregate: Detection,
185    /// Request-level classification outputs.
186    pub requests: Vec<HarRequestSummary>,
187}
188
189/// Frequency count for a normalized marker string.
190#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
191pub struct MarkerCount {
192    /// Marker text.
193    pub marker: String,
194    /// Number of requests where the marker appears.
195    pub count: u64,
196}
197
198/// Aggregated request metrics per host.
199#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200pub struct HostSummary {
201    /// Hostname extracted from request URL.
202    pub host: String,
203    /// Total requests observed for this host.
204    pub total_requests: u64,
205    /// Requests that returned HTTP 403 or 429.
206    pub blocked_requests: u64,
207}
208
209/// Full-featured HAR investigation output suitable for diffs and alerting.
210#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
211pub struct InvestigationReport {
212    /// URL/title from HAR page metadata when available.
213    pub page_title: Option<String>,
214    /// Total requests in the capture.
215    pub total_requests: u64,
216    /// Count of blocked/challenged requests (403/429).
217    pub blocked_requests: u64,
218    /// Status-code histogram.
219    pub status_histogram: BTreeMap<u16, u64>,
220    /// Resource-type histogram from HAR metadata.
221    pub resource_type_histogram: BTreeMap<String, u64>,
222    /// Provider histogram inferred from signatures.
223    pub provider_histogram: BTreeMap<AntiBotProvider, u64>,
224    /// Full marker histogram inferred from signatures.
225    pub marker_histogram: BTreeMap<String, u64>,
226    /// Most frequent signature markers.
227    pub top_markers: Vec<MarkerCount>,
228    /// Top hosts by request volume.
229    pub hosts: Vec<HostSummary>,
230    /// Suspicious requests (blocked/challenged or with known provider markers).
231    pub suspicious_requests: Vec<HarRequestSummary>,
232    /// Aggregate provider classification.
233    pub aggregate: Detection,
234    /// Target website class for SLO assessment (optional; defaults to Unknown).
235    #[serde(default)]
236    pub target_class: Option<TargetClass>,
237}
238
239/// Delta between a baseline report and a candidate report.
240#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
241pub struct InvestigationDiff {
242    /// Baseline request count.
243    pub baseline_total_requests: u64,
244    /// Candidate request count.
245    pub candidate_total_requests: u64,
246    /// Baseline blocked requests.
247    pub baseline_blocked_requests: u64,
248    /// Candidate blocked requests.
249    pub candidate_blocked_requests: u64,
250    /// Candidate blocked ratio minus baseline blocked ratio.
251    pub blocked_ratio_delta: f64,
252    /// Whether blocked ratio increased by at least 2 percentage points.
253    pub likely_regression: bool,
254    /// Provider count delta: candidate minus baseline.
255    pub provider_delta: BTreeMap<AntiBotProvider, i64>,
256    /// New markers observed in candidate but not baseline.
257    pub new_markers: Vec<String>,
258}
259
260/// Severity/importance level for an inferred operational requirement.
261#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
262pub enum RequirementLevel {
263    /// Helpful, but usually not mandatory.
264    Low,
265    /// Strongly recommended for reliable automation.
266    Medium,
267    /// Typically required to avoid frequent blocks/challenges.
268    High,
269}
270
271/// One inferred operational requirement derived from telemetry.
272#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
273pub struct AntiBotRequirement {
274    /// Stable identifier for the requirement.
275    pub id: String,
276    /// Human-friendly requirement title.
277    pub title: String,
278    /// Why this requirement appears to matter.
279    pub why: String,
280    /// Marker evidence or metrics supporting the inference.
281    pub evidence: Vec<String>,
282    /// Estimated requirement importance.
283    pub level: RequirementLevel,
284}
285
286/// High-level integration strategy for Stygian execution.
287#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
288pub enum AdapterStrategy {
289    /// Standard HTTP adapter path appears sufficient.
290    DirectHttp,
291    /// Browser-backed execution is recommended.
292    BrowserStealth,
293    /// Sticky session + proxy continuity should be applied.
294    StickyProxy,
295    /// Warm-up/session priming before data collection is advised.
296    SessionWarmup,
297    /// Unknown/ambiguous conditions: keep in investigation mode.
298    InvestigateOnly,
299}
300
301/// Suggested Stygian integration plan derived from investigation signals.
302#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
303pub struct IntegrationRecommendation {
304    /// Selected strategy.
305    pub strategy: AdapterStrategy,
306    /// Why this strategy was selected.
307    pub rationale: String,
308    /// Suggested feature flags/components for Stygian wiring.
309    pub required_stygian_features: Vec<String>,
310    /// Suggested runtime configuration hints.
311    pub config_hints: BTreeMap<String, String>,
312}
313
314/// Provider-aware operational profile and integration guidance.
315#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
316pub struct RequirementsProfile {
317    /// Aggregate inferred provider.
318    pub provider: AntiBotProvider,
319    /// Confidence for the provider assignment.
320    pub confidence: f64,
321    /// Inferred operational requirements.
322    pub requirements: Vec<AntiBotRequirement>,
323    /// Suggested Stygian integration strategy.
324    pub recommendation: IntegrationRecommendation,
325}
326
327/// High-level execution mode for a target.
328#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
329pub enum ExecutionMode {
330    /// Standard HTTP adapters.
331    Http,
332    /// Browser-backed execution.
333    Browser,
334}
335
336/// Session persistence mode.
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
338pub enum SessionMode {
339    /// No explicit session persistence.
340    Stateless,
341    /// Reuse a sticky proxy/session identity.
342    Sticky,
343}
344
345/// Recommended anti-bot telemetry level.
346#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
347pub enum TelemetryLevel {
348    /// Minimal telemetry.
349    Basic,
350    /// Normal diagnostics.
351    Standard,
352    /// Deep diagnostics and marker tracking.
353    Deep,
354}
355
356/// Concrete runtime policy that can be mapped to Stygian config.
357#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
358pub struct RuntimePolicy {
359    /// Recommended execution mode.
360    pub execution_mode: ExecutionMode,
361    /// Recommended session mode.
362    pub session_mode: SessionMode,
363    /// Recommended telemetry level.
364    pub telemetry_level: TelemetryLevel,
365    /// Requests per second budget.
366    pub rate_limit_rps: f64,
367    /// Max retries per request.
368    pub max_retries: u32,
369    /// Baseline backoff in milliseconds.
370    pub backoff_base_ms: u64,
371    /// Whether warm-up navigation/requests are recommended.
372    pub enable_warmup: bool,
373    /// Whether browser context should block WebRTC non-proxied paths.
374    pub enforce_webrtc_proxy_only: bool,
375    /// Suggested sticky-session TTL in seconds (if relevant).
376    pub sticky_session_ttl_secs: Option<u64>,
377    /// Required Stygian features/components.
378    pub required_stygian_features: Vec<String>,
379    /// Additional hints mapped by key.
380    pub config_hints: BTreeMap<String, String>,
381    /// Composite risk score in [0.0, 1.0].
382    pub risk_score: f64,
383}
384
385/// End-to-end result from HAR analysis, requirements inference, and policy planning.
386#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
387pub struct InvestigationBundle {
388    /// Parsed/aggregated investigation report.
389    pub report: InvestigationReport,
390    /// Inferred requirements profile.
391    pub requirements: RequirementsProfile,
392    /// Planned runtime policy.
393    pub policy: RuntimePolicy,
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    #[test]
401    fn test_blocked_ratio_slo_api_thresholds() {
402        let slo = BlockedRatioSlo::api();
403        assert_eq!(slo.target_class, TargetClass::Api);
404        assert!((slo.acceptable - 0.05).abs() < f64::EPSILON);
405        assert!((slo.warning - 0.10).abs() < f64::EPSILON);
406        assert!((slo.critical - 0.15).abs() < f64::EPSILON);
407    }
408
409    #[test]
410    fn test_blocked_ratio_slo_content_site_thresholds() {
411        let slo = BlockedRatioSlo::content_site();
412        assert_eq!(slo.target_class, TargetClass::ContentSite);
413        assert!((slo.acceptable - 0.15).abs() < f64::EPSILON);
414        assert!((slo.warning - 0.25).abs() < f64::EPSILON);
415        assert!((slo.critical - 0.40).abs() < f64::EPSILON);
416    }
417
418    #[test]
419    fn test_blocked_ratio_slo_high_security_thresholds() {
420        let slo = BlockedRatioSlo::high_security();
421        assert_eq!(slo.target_class, TargetClass::HighSecurity);
422        assert!((slo.acceptable - 0.30).abs() < f64::EPSILON);
423        assert!((slo.warning - 0.50).abs() < f64::EPSILON);
424        assert!((slo.critical - 0.70).abs() < f64::EPSILON);
425    }
426
427    #[test]
428    fn test_blocked_ratio_slo_unknown_defaults_to_api() {
429        let slo = BlockedRatioSlo::unknown();
430        assert_eq!(slo.target_class, TargetClass::Unknown);
431        assert!((slo.acceptable - 0.05).abs() < f64::EPSILON); // Same thresholds as API
432        assert!((slo.warning - 0.10).abs() < f64::EPSILON);
433        assert!((slo.critical - 0.15).abs() < f64::EPSILON);
434    }
435
436    #[test]
437    fn test_blocked_ratio_slo_for_class_api() {
438        let slo = BlockedRatioSlo::for_class(TargetClass::Api);
439        assert_eq!(slo.target_class, TargetClass::Api);
440        assert!((slo.acceptable - 0.05).abs() < f64::EPSILON);
441    }
442
443    #[test]
444    fn test_blocked_ratio_slo_for_class_content_site() {
445        let slo = BlockedRatioSlo::for_class(TargetClass::ContentSite);
446        assert_eq!(slo.target_class, TargetClass::ContentSite);
447        assert!((slo.acceptable - 0.15).abs() < f64::EPSILON);
448    }
449
450    #[test]
451    fn test_blocked_ratio_slo_assess_below_acceptable() {
452        let slo = BlockedRatioSlo::api();
453        let (acceptable, warning, critical) = slo.assess(0.02);
454        assert!(acceptable);
455        assert!(!warning);
456        assert!(!critical);
457    }
458
459    #[test]
460    fn test_blocked_ratio_slo_assess_at_acceptable() {
461        let slo = BlockedRatioSlo::api();
462        let (acceptable, warning, critical) = slo.assess(0.05);
463        assert!(acceptable);
464        assert!(!warning);
465        assert!(!critical);
466    }
467
468    #[test]
469    fn test_blocked_ratio_slo_assess_in_warning_zone() {
470        let slo = BlockedRatioSlo::api();
471        let (acceptable, warning, critical) = slo.assess(0.075);
472        assert!(!acceptable);
473        assert!(warning);
474        assert!(!critical);
475    }
476
477    #[test]
478    fn test_blocked_ratio_slo_assess_at_warning() {
479        let slo = BlockedRatioSlo::api();
480        let (acceptable, warning, critical) = slo.assess(0.10);
481        // At exactly 0.10 (warning threshold), warning should be true
482        // because warning is true when > acceptable && <= warning
483        assert!(!acceptable);
484        assert!(warning); // 0.10 is in the warning zone (0.05 < 0.10 <= 0.10)
485        assert!(!critical);
486    }
487
488    #[test]
489    fn test_blocked_ratio_slo_assess_between_warning_and_critical() {
490        let slo = BlockedRatioSlo::api();
491        let (acceptable, warning, critical) = slo.assess(0.125);
492        assert!(!acceptable);
493        assert!(!warning);
494        assert!(!critical);
495    }
496
497    #[test]
498    fn test_blocked_ratio_slo_assess_above_critical() {
499        let slo = BlockedRatioSlo::api();
500        let (acceptable, warning, critical) = slo.assess(0.20);
501        assert!(!acceptable);
502        assert!(!warning);
503        assert!(critical);
504    }
505
506    #[test]
507    fn test_blocked_ratio_slo_content_site_assessment() {
508        let slo = BlockedRatioSlo::content_site();
509
510        // Below acceptable (green)
511        let (acc, warn, crit) = slo.assess(0.10);
512        assert!(acc && !warn && !crit);
513
514        // In warning zone (yellow): 0.15 < 0.20 <= 0.25
515        let (acc, warn, crit) = slo.assess(0.20);
516        assert!(!acc && warn && !crit);
517
518        // In critical zone: 0.45 > 0.40
519        let (acc, warn, crit) = slo.assess(0.45);
520        assert!(!acc && !warn && crit);
521
522        // Exactly at critical threshold
523        let (acc, warn, crit) = slo.assess(0.40);
524        assert!(!acc && !warn && !crit); // Exactly at threshold is not > critical
525    }
526
527    #[test]
528    fn test_target_class_derives() {
529        // Verify that TargetClass can be compared and hashed
530        let api1 = TargetClass::Api;
531        let api2 = TargetClass::Api;
532        let content = TargetClass::ContentSite;
533
534        assert_eq!(api1, api2);
535        assert_ne!(api1, content);
536    }
537
538    #[test]
539    fn test_blocked_ratio_slo_serialization() {
540        let slo = BlockedRatioSlo::content_site();
541        let json = serde_json::to_string(&slo).unwrap_or_default();
542        if let Ok(deserialized) = serde_json::from_str::<BlockedRatioSlo>(&json) {
543            assert_eq!(slo, deserialized);
544        }
545    }
546
547    #[test]
548    fn test_target_class_serialization() {
549        let target = TargetClass::HighSecurity;
550        let json = serde_json::to_string(&target).unwrap_or_default();
551        if let Ok(deserialized) = serde_json::from_str::<TargetClass>(&json) {
552            assert_eq!(target, deserialized);
553        }
554    }
555}