Skip to main content

stygian_charon/
analyzer.rs

1use serde::{Deserialize, Serialize};
2
3use crate::har;
4use crate::types::{Detection, HarClassificationReport, HarRequestSummary, TransactionView};
5
6/// Version identifier for Charon provider analyzers.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
8pub enum AnalyzerVersion {
9    /// Current signature analyzer.
10    #[default]
11    V1,
12    /// Legacy signature analyzer retained for compatibility.
13    V1Legacy,
14}
15
16impl AnalyzerVersion {
17    /// Stable version id used in configs and reports.
18    #[must_use]
19    pub const fn id(self) -> &'static str {
20        match self {
21            Self::V1 => "v1",
22            Self::V1Legacy => "v1-legacy",
23        }
24    }
25
26    /// Parse a version id.
27    #[must_use]
28    pub fn parse(id: &str) -> Option<Self> {
29        match id {
30            "v1" => Some(Self::V1),
31            "v1-legacy" => Some(Self::V1Legacy),
32            _ => None,
33        }
34    }
35
36    /// Return `true` when this version is deprecated.
37    #[must_use]
38    pub const fn is_deprecated(self) -> bool {
39        matches!(self, Self::V1Legacy)
40    }
41
42    /// Recommended migration target for deprecated versions.
43    #[must_use]
44    pub const fn migration_target(self) -> Option<Self> {
45        match self {
46            Self::V1Legacy => Some(Self::V1),
47            Self::V1 => None,
48        }
49    }
50}
51
52/// Runtime profile selecting analyzer behavior.
53#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
54pub struct AnalyzerProfile {
55    /// Profile identifier for logging and policy wiring.
56    pub profile_id: String,
57    /// Analyzer version used for classification.
58    pub analyzer_version: AnalyzerVersion,
59}
60
61impl Default for AnalyzerProfile {
62    fn default() -> Self {
63        Self {
64            profile_id: "default".to_string(),
65            analyzer_version: AnalyzerVersion::V1,
66        }
67    }
68}
69
70/// Interface for versioned provider analyzers.
71pub trait ProviderAnalyzer {
72    /// Analyzer version identifier.
73    fn version(&self) -> AnalyzerVersion;
74
75    /// Classify one transaction into provider evidence.
76    fn classify_transaction(&self, tx: &TransactionView) -> Detection;
77
78    /// Classify all transactions in a HAR payload.
79    ///
80    /// # Errors
81    ///
82    /// Returns an error when HAR parsing fails.
83    fn classify_har(&self, har_json: &str) -> Result<HarClassificationReport, har::HarError> {
84        let parsed = har::parse_har_transactions(har_json)?;
85
86        let requests = parsed
87            .requests
88            .into_iter()
89            .map(|req| HarRequestSummary {
90                url: req.transaction.url.clone(),
91                status: req.transaction.status,
92                resource_type: req.resource_type,
93                detection: self.classify_transaction(&req.transaction),
94            })
95            .collect::<Vec<_>>();
96
97        Ok(HarClassificationReport {
98            page_title: parsed.page_title,
99            aggregate: aggregate_detection(&requests),
100            requests,
101        })
102    }
103}
104
105fn aggregate_detection(requests: &[HarRequestSummary]) -> Detection {
106    let mut provider_counts: std::collections::BTreeMap<crate::types::AntiBotProvider, u32> =
107        std::collections::BTreeMap::new();
108    let mut markers: Vec<String> = Vec::new();
109
110    for req in requests {
111        if req.detection.provider != crate::types::AntiBotProvider::Unknown {
112            let entry = provider_counts.entry(req.detection.provider).or_insert(0);
113            *entry = entry.saturating_add(1);
114        }
115        markers.extend(req.detection.markers.iter().cloned());
116    }
117
118    if provider_counts.is_empty() {
119        return Detection {
120            provider: crate::types::AntiBotProvider::Unknown,
121            confidence: 0.0,
122            markers: Vec::new(),
123        };
124    }
125
126    let mut ordered: Vec<(crate::types::AntiBotProvider, u32)> =
127        provider_counts.into_iter().collect();
128    ordered.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
129
130    if let Some((provider, top_count)) = ordered.first().copied() {
131        let second_count = ordered.get(1).map_or(0, |x| x.1);
132        let confidence = if top_count + second_count == 0 {
133            0.0
134        } else {
135            f64::from(top_count) / f64::from(top_count + second_count)
136        };
137
138        Detection {
139            provider,
140            confidence,
141            markers,
142        }
143    } else {
144        Detection {
145            provider: crate::types::AntiBotProvider::Unknown,
146            confidence: 0.0,
147            markers: Vec::new(),
148        }
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn analyzer_version_migration_path_is_defined_for_legacy() {
158        assert!(AnalyzerVersion::V1Legacy.is_deprecated());
159        assert_eq!(
160            AnalyzerVersion::V1Legacy.migration_target(),
161            Some(AnalyzerVersion::V1)
162        );
163        assert_eq!(AnalyzerVersion::V1.migration_target(), None);
164    }
165
166    #[test]
167    fn analyzer_version_parse_roundtrip() {
168        let parsed = AnalyzerVersion::parse("v1");
169        assert_eq!(parsed, Some(AnalyzerVersion::V1));
170        assert_eq!(AnalyzerVersion::V1.id(), "v1");
171    }
172}