Skip to main content

stygian_charon/
types.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5/// Target website classification for SLO thresholds.
6///
7/// Used to determine acceptable blocked ratios and risk assessments based on expected
8/// anti-bot posture. Different sites have different security requirements:
9///
10/// - **API**: Machine-to-machine communication; expects very low block ratio.
11/// - **`ContentSite`**: Public web content; moderate block tolerance.
12/// - **`HighSecurity`**: Banking, auth, sensitive data; higher block ratio acceptable.
13/// - **Unknown**: Default classification when unable to determine target type.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
15#[serde(rename_all = "snake_case")]
16pub enum TargetClass {
17    /// REST API or GraphQL endpoint; expect clean machine-to-machine paths.
18    Api,
19    /// General content site or e-commerce; browser-like requests expected.
20    ContentSite,
21    /// High-security property (banking, auth, sensitive data); strict anti-bot expected.
22    HighSecurity,
23    /// Unknown or unclassified target.
24    Unknown,
25}
26
27/// Blocked ratio service-level objectives (SLOs) by target class.
28///
29/// Defines acceptable and concerning block ratios for different target types.
30/// These thresholds guide requirement inference and risk scoring.
31#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
32pub struct BlockedRatioSlo {
33    /// Target class for these SLOs.
34    pub target_class: TargetClass,
35    /// Acceptable block ratio (green threshold); below this is normal.
36    pub acceptable: f64,
37    /// Warning threshold; above this triggers adaptive rate requirement.
38    pub warning: f64,
39    /// Critical threshold; above this indicates severe anti-bot posture.
40    pub critical: f64,
41}
42
43impl BlockedRatioSlo {
44    /// Default SLOs for API targets (0-5% blocks, 10% warning, 15% critical).
45    #[must_use]
46    pub const fn api() -> Self {
47        Self {
48            target_class: TargetClass::Api,
49            acceptable: 0.05,
50            warning: 0.10,
51            critical: 0.15,
52        }
53    }
54
55    /// Default SLOs for content sites (0-15% blocks, 25% warning, 40% critical).
56    #[must_use]
57    pub const fn content_site() -> Self {
58        Self {
59            target_class: TargetClass::ContentSite,
60            acceptable: 0.15,
61            warning: 0.25,
62            critical: 0.40,
63        }
64    }
65
66    /// Default SLOs for high-security sites (0-30% blocks, 50% warning, 70% critical).
67    #[must_use]
68    pub const fn high_security() -> Self {
69        Self {
70            target_class: TargetClass::HighSecurity,
71            acceptable: 0.30,
72            warning: 0.50,
73            critical: 0.70,
74        }
75    }
76
77    /// Default SLOs for unknown targets (conservative: API thresholds).
78    #[must_use]
79    pub const fn unknown() -> Self {
80        Self {
81            target_class: TargetClass::Unknown,
82            acceptable: 0.05, // Same as API
83            warning: 0.10,
84            critical: 0.15,
85        }
86    }
87
88    /// Get SLO for a target class.
89    #[must_use]
90    pub const fn for_class(class: TargetClass) -> Self {
91        match class {
92            TargetClass::Api => Self::api(),
93            TargetClass::ContentSite => Self::content_site(),
94            TargetClass::HighSecurity => Self::high_security(),
95            TargetClass::Unknown => Self::unknown(),
96        }
97    }
98
99    /// Assess blocked ratio against SLO thresholds.
100    ///
101    /// Returns `(is_acceptable, is_warning, is_critical)`.
102    #[must_use]
103    pub fn assess(&self, blocked_ratio: f64) -> (bool, bool, bool) {
104        (
105            blocked_ratio <= self.acceptable,
106            blocked_ratio > self.acceptable && blocked_ratio <= self.warning,
107            blocked_ratio > self.critical,
108        )
109    }
110}
111
112/// A simplified view of one HTTP transaction used for provider classification.
113#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
114pub struct TransactionView {
115    /// Request URL.
116    pub url: String,
117    /// HTTP status code.
118    pub status: u16,
119    /// Response headers (lower/upper case are normalized by the classifier).
120    pub response_headers: BTreeMap<String, String>,
121    /// Optional response body snippet.
122    pub response_body_snippet: Option<String>,
123}
124
125/// Known anti-bot providers recognized by the classifier.
126#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
127pub enum AntiBotProvider {
128    /// `DataDome`.
129    DataDome,
130    /// Cloudflare bot/challenge stack.
131    Cloudflare,
132    /// Akamai bot manager indicators.
133    Akamai,
134    /// Human Security / `PerimeterX` indicators.
135    PerimeterX,
136    /// Kasada indicators.
137    Kasada,
138    /// Fingerprint.com markers.
139    FingerprintCom,
140    /// Catch-all when no provider-specific signatures were found.
141    Unknown,
142}
143
144/// Classification result with evidence markers.
145#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
146pub struct Detection {
147    /// Most likely provider.
148    pub provider: AntiBotProvider,
149    /// Simple confidence score in [0.0, 1.0].
150    pub confidence: f64,
151    /// Marker strings that matched.
152    pub markers: Vec<String>,
153}
154
155/// Scorecard for one provider.
156#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
157pub struct ProviderScore {
158    /// Provider represented by this score.
159    pub provider: AntiBotProvider,
160    /// Weighted score from marker matches.
161    pub score: u32,
162    /// Evidence used to produce the score.
163    pub markers: Vec<String>,
164}
165
166/// Minimal per-request summary extracted from a HAR file.
167#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
168pub struct HarRequestSummary {
169    /// URL requested.
170    pub url: String,
171    /// HTTP status code.
172    pub status: u16,
173    /// Best-effort resource type from HAR metadata.
174    pub resource_type: Option<String>,
175    /// Detection result for this request.
176    pub detection: Detection,
177}
178
179/// Full HAR classification report.
180#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
181pub struct HarClassificationReport {
182    /// URL/title from HAR page metadata when available.
183    pub page_title: Option<String>,
184    /// Summary classification for all entries.
185    pub aggregate: Detection,
186    /// Request-level classification outputs.
187    pub requests: Vec<HarRequestSummary>,
188}
189
190/// Frequency count for a normalized marker string.
191#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
192pub struct MarkerCount {
193    /// Marker text.
194    pub marker: String,
195    /// Number of requests where the marker appears.
196    pub count: u64,
197}
198
199/// Aggregated request metrics per host.
200#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
201pub struct HostSummary {
202    /// Hostname extracted from request URL.
203    pub host: String,
204    /// Total requests observed for this host.
205    pub total_requests: u64,
206    /// Requests that returned HTTP 403 or 429.
207    pub blocked_requests: u64,
208}
209
210/// Full-featured HAR investigation output suitable for diffs and alerting.
211#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
212pub struct InvestigationReport {
213    /// URL/title from HAR page metadata when available.
214    pub page_title: Option<String>,
215    /// Total requests in the capture.
216    pub total_requests: u64,
217    /// Count of blocked/challenged requests (403/429).
218    pub blocked_requests: u64,
219    /// Status-code histogram.
220    pub status_histogram: BTreeMap<u16, u64>,
221    /// Resource-type histogram from HAR metadata.
222    pub resource_type_histogram: BTreeMap<String, u64>,
223    /// Provider histogram inferred from signatures.
224    pub provider_histogram: BTreeMap<AntiBotProvider, u64>,
225    /// Full marker histogram inferred from signatures.
226    pub marker_histogram: BTreeMap<String, u64>,
227    /// Most frequent signature markers.
228    pub top_markers: Vec<MarkerCount>,
229    /// Top hosts by request volume.
230    pub hosts: Vec<HostSummary>,
231    /// Suspicious requests (blocked/challenged or with known provider markers).
232    pub suspicious_requests: Vec<HarRequestSummary>,
233    /// Aggregate provider classification.
234    pub aggregate: Detection,
235    /// Target website class for SLO assessment (optional; defaults to Unknown).
236    #[serde(default)]
237    pub target_class: Option<TargetClass>,
238}
239
240/// Delta between a baseline report and a candidate report.
241#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
242pub struct InvestigationDiff {
243    /// Baseline request count.
244    pub baseline_total_requests: u64,
245    /// Candidate request count.
246    pub candidate_total_requests: u64,
247    /// Baseline blocked requests.
248    pub baseline_blocked_requests: u64,
249    /// Candidate blocked requests.
250    pub candidate_blocked_requests: u64,
251    /// Candidate blocked ratio minus baseline blocked ratio.
252    pub blocked_ratio_delta: f64,
253    /// Whether blocked ratio increased by at least 2 percentage points.
254    pub likely_regression: bool,
255    /// Provider count delta: candidate minus baseline.
256    pub provider_delta: BTreeMap<AntiBotProvider, i64>,
257    /// New markers observed in candidate but not baseline.
258    pub new_markers: Vec<String>,
259}
260
261/// Severity/importance level for an inferred operational requirement.
262#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
263pub enum RequirementLevel {
264    /// Helpful, but usually not mandatory.
265    Low,
266    /// Strongly recommended for reliable automation.
267    Medium,
268    /// Typically required to avoid frequent blocks/challenges.
269    High,
270}
271
272/// One inferred operational requirement derived from telemetry.
273#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
274pub struct AntiBotRequirement {
275    /// Stable identifier for the requirement.
276    pub id: String,
277    /// Human-friendly requirement title.
278    pub title: String,
279    /// Why this requirement appears to matter.
280    pub why: String,
281    /// Marker evidence or metrics supporting the inference.
282    pub evidence: Vec<String>,
283    /// Estimated requirement importance.
284    pub level: RequirementLevel,
285}
286
287/// High-level integration strategy for Stygian execution.
288#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
289pub enum AdapterStrategy {
290    /// Standard HTTP adapter path appears sufficient.
291    DirectHttp,
292    /// Browser-backed execution is recommended.
293    BrowserStealth,
294    /// Sticky session + proxy continuity should be applied.
295    StickyProxy,
296    /// Warm-up/session priming before data collection is advised.
297    SessionWarmup,
298    /// Unknown/ambiguous conditions: keep in investigation mode.
299    InvestigateOnly,
300}
301
302/// Suggested Stygian integration plan derived from investigation signals.
303#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
304pub struct IntegrationRecommendation {
305    /// Selected strategy.
306    pub strategy: AdapterStrategy,
307    /// Why this strategy was selected.
308    pub rationale: String,
309    /// Suggested feature flags/components for Stygian wiring.
310    pub required_stygian_features: Vec<String>,
311    /// Suggested runtime configuration hints.
312    pub config_hints: BTreeMap<String, String>,
313}
314
315/// Provider-aware operational profile and integration guidance.
316#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
317pub struct RequirementsProfile {
318    /// Aggregate inferred provider.
319    pub provider: AntiBotProvider,
320    /// Confidence for the provider assignment.
321    pub confidence: f64,
322    /// Inferred operational requirements.
323    pub requirements: Vec<AntiBotRequirement>,
324    /// Suggested Stygian integration strategy.
325    pub recommendation: IntegrationRecommendation,
326}
327
328/// High-level execution mode for a target.
329#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
330#[serde(rename_all = "snake_case")]
331pub enum ExecutionMode {
332    /// Standard HTTP adapters.
333    Http,
334    /// Browser-backed execution.
335    Browser,
336}
337
338/// Session persistence mode.
339#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
340#[serde(rename_all = "snake_case")]
341pub enum SessionMode {
342    /// No explicit session persistence.
343    Stateless,
344    /// Reuse a sticky proxy/session identity.
345    Sticky,
346}
347
348/// Recommended anti-bot telemetry level.
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
350#[serde(rename_all = "snake_case")]
351pub enum TelemetryLevel {
352    /// Minimal telemetry.
353    Basic,
354    /// Normal diagnostics.
355    Standard,
356    /// Deep diagnostics and marker tracking.
357    Deep,
358}
359
360/// Concrete runtime policy that can be mapped to Stygian config.
361#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
362pub struct RuntimePolicy {
363    /// Recommended execution mode.
364    pub execution_mode: ExecutionMode,
365    /// Recommended session mode.
366    pub session_mode: SessionMode,
367    /// Recommended telemetry level.
368    pub telemetry_level: TelemetryLevel,
369    /// Requests per second budget.
370    pub rate_limit_rps: f64,
371    /// Max retries per request.
372    pub max_retries: u32,
373    /// Baseline backoff in milliseconds.
374    pub backoff_base_ms: u64,
375    /// Whether warm-up navigation/requests are recommended.
376    pub enable_warmup: bool,
377    /// Whether browser context should block WebRTC non-proxied paths.
378    pub enforce_webrtc_proxy_only: bool,
379    /// Suggested sticky-session TTL in seconds (if relevant).
380    pub sticky_session_ttl_secs: Option<u64>,
381    /// Required Stygian features/components.
382    pub required_stygian_features: Vec<String>,
383    /// Additional hints mapped by key.
384    pub config_hints: BTreeMap<String, String>,
385    /// Composite risk score in [0.0, 1.0].
386    pub risk_score: f64,
387}
388
389/// End-to-end result from HAR analysis, requirements inference, and policy planning.
390#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
391pub struct InvestigationBundle {
392    /// Parsed/aggregated investigation report.
393    pub report: InvestigationReport,
394    /// Inferred requirements profile.
395    pub requirements: RequirementsProfile,
396    /// Planned runtime policy.
397    pub policy: RuntimePolicy,
398}
399
400#[cfg(test)]
401#[allow(
402    clippy::unwrap_used,
403    clippy::expect_used,
404    clippy::panic,
405    clippy::indexing_slicing
406)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn test_blocked_ratio_slo_api_thresholds() {
412        let slo = BlockedRatioSlo::api();
413        assert_eq!(slo.target_class, TargetClass::Api);
414        assert!((slo.acceptable - 0.05).abs() < f64::EPSILON);
415        assert!((slo.warning - 0.10).abs() < f64::EPSILON);
416        assert!((slo.critical - 0.15).abs() < f64::EPSILON);
417    }
418
419    #[test]
420    fn test_blocked_ratio_slo_content_site_thresholds() {
421        let slo = BlockedRatioSlo::content_site();
422        assert_eq!(slo.target_class, TargetClass::ContentSite);
423        assert!((slo.acceptable - 0.15).abs() < f64::EPSILON);
424        assert!((slo.warning - 0.25).abs() < f64::EPSILON);
425        assert!((slo.critical - 0.40).abs() < f64::EPSILON);
426    }
427
428    #[test]
429    fn test_blocked_ratio_slo_high_security_thresholds() {
430        let slo = BlockedRatioSlo::high_security();
431        assert_eq!(slo.target_class, TargetClass::HighSecurity);
432        assert!((slo.acceptable - 0.30).abs() < f64::EPSILON);
433        assert!((slo.warning - 0.50).abs() < f64::EPSILON);
434        assert!((slo.critical - 0.70).abs() < f64::EPSILON);
435    }
436
437    #[test]
438    fn test_blocked_ratio_slo_unknown_defaults_to_api() {
439        let slo = BlockedRatioSlo::unknown();
440        assert_eq!(slo.target_class, TargetClass::Unknown);
441        assert!((slo.acceptable - 0.05).abs() < f64::EPSILON); // Same thresholds as API
442        assert!((slo.warning - 0.10).abs() < f64::EPSILON);
443        assert!((slo.critical - 0.15).abs() < f64::EPSILON);
444    }
445
446    #[test]
447    fn test_blocked_ratio_slo_for_class_api() {
448        let slo = BlockedRatioSlo::for_class(TargetClass::Api);
449        assert_eq!(slo.target_class, TargetClass::Api);
450        assert!((slo.acceptable - 0.05).abs() < f64::EPSILON);
451    }
452
453    #[test]
454    fn test_blocked_ratio_slo_for_class_content_site() {
455        let slo = BlockedRatioSlo::for_class(TargetClass::ContentSite);
456        assert_eq!(slo.target_class, TargetClass::ContentSite);
457        assert!((slo.acceptable - 0.15).abs() < f64::EPSILON);
458    }
459
460    #[test]
461    fn test_blocked_ratio_slo_assess_below_acceptable() {
462        let slo = BlockedRatioSlo::api();
463        let (acceptable, warning, critical) = slo.assess(0.02);
464        assert!(acceptable);
465        assert!(!warning);
466        assert!(!critical);
467    }
468
469    #[test]
470    fn test_blocked_ratio_slo_assess_at_acceptable() {
471        let slo = BlockedRatioSlo::api();
472        let (acceptable, warning, critical) = slo.assess(0.05);
473        assert!(acceptable);
474        assert!(!warning);
475        assert!(!critical);
476    }
477
478    #[test]
479    fn test_blocked_ratio_slo_assess_in_warning_zone() {
480        let slo = BlockedRatioSlo::api();
481        let (acceptable, warning, critical) = slo.assess(0.075);
482        assert!(!acceptable);
483        assert!(warning);
484        assert!(!critical);
485    }
486
487    #[test]
488    fn test_blocked_ratio_slo_assess_at_warning() {
489        let slo = BlockedRatioSlo::api();
490        let (acceptable, warning, critical) = slo.assess(0.10);
491        // At exactly 0.10 (warning threshold), warning should be true
492        // because warning is true when > acceptable && <= warning
493        assert!(!acceptable);
494        assert!(warning); // 0.10 is in the warning zone (0.05 < 0.10 <= 0.10)
495        assert!(!critical);
496    }
497
498    #[test]
499    fn test_blocked_ratio_slo_assess_between_warning_and_critical() {
500        let slo = BlockedRatioSlo::api();
501        let (acceptable, warning, critical) = slo.assess(0.125);
502        assert!(!acceptable);
503        assert!(!warning);
504        assert!(!critical);
505    }
506
507    #[test]
508    fn test_blocked_ratio_slo_assess_above_critical() {
509        let slo = BlockedRatioSlo::api();
510        let (acceptable, warning, critical) = slo.assess(0.20);
511        assert!(!acceptable);
512        assert!(!warning);
513        assert!(critical);
514    }
515
516    #[test]
517    fn test_blocked_ratio_slo_content_site_assessment() {
518        let slo = BlockedRatioSlo::content_site();
519
520        // Below acceptable (green)
521        let (acc, warn, crit) = slo.assess(0.10);
522        assert!(acc && !warn && !crit);
523
524        // In warning zone (yellow): 0.15 < 0.20 <= 0.25
525        let (acc, warn, crit) = slo.assess(0.20);
526        assert!(!acc && warn && !crit);
527
528        // In critical zone: 0.45 > 0.40
529        let (acc, warn, crit) = slo.assess(0.45);
530        assert!(!acc && !warn && crit);
531
532        // Exactly at critical threshold
533        let (acc, warn, crit) = slo.assess(0.40);
534        assert!(!acc && !warn && !crit); // Exactly at threshold is not > critical
535    }
536
537    #[test]
538    fn test_target_class_derives() {
539        // Verify that TargetClass can be compared and hashed
540        let api1 = TargetClass::Api;
541        let api2 = TargetClass::Api;
542        let content = TargetClass::ContentSite;
543
544        assert_eq!(api1, api2);
545        assert_ne!(api1, content);
546    }
547
548    #[test]
549    fn test_blocked_ratio_slo_serialization() {
550        let slo = BlockedRatioSlo::content_site();
551        let json = serde_json::to_string(&slo).unwrap_or_default();
552        if let Ok(deserialized) = serde_json::from_str::<BlockedRatioSlo>(&json) {
553            assert_eq!(slo, deserialized);
554        }
555    }
556
557    #[test]
558    fn test_target_class_serialization() {
559        let target = TargetClass::HighSecurity;
560        let json = serde_json::to_string(&target).unwrap_or_default();
561        if let Ok(deserialized) = serde_json::from_str::<TargetClass>(&json) {
562            assert_eq!(target, deserialized);
563        }
564    }
565}