Skip to main content

stygian_charon/
classifier.rs

1use std::cmp::Reverse;
2use std::collections::BTreeMap;
3
4use crate::analyzer::{AnalyzerProfile, AnalyzerVersion, ProviderAnalyzer};
5use crate::har;
6use crate::types::{
7    AntiBotProvider, Detection, HarClassificationReport, ProviderScore, TransactionView,
8};
9
10#[derive(Debug, Clone, Copy)]
11struct Signature {
12    needle: &'static str,
13    provider: AntiBotProvider,
14    weight: u32,
15}
16
17const SIGNATURES: &[Signature] = &[
18    Signature {
19        needle: "x-datadome",
20        provider: AntiBotProvider::DataDome,
21        weight: 5,
22    },
23    Signature {
24        needle: "x-datadome-cid",
25        provider: AntiBotProvider::DataDome,
26        weight: 5,
27    },
28    Signature {
29        needle: "x-dd-b",
30        provider: AntiBotProvider::DataDome,
31        weight: 4,
32    },
33    Signature {
34        needle: "datadome=",
35        provider: AntiBotProvider::DataDome,
36        weight: 4,
37    },
38    Signature {
39        needle: "captcha-delivery.com",
40        provider: AntiBotProvider::DataDome,
41        weight: 4,
42    },
43    Signature {
44        needle: "server:cloudflare",
45        provider: AntiBotProvider::Cloudflare,
46        weight: 3,
47    },
48    Signature {
49        needle: "cf-ray",
50        provider: AntiBotProvider::Cloudflare,
51        weight: 5,
52    },
53    Signature {
54        needle: "__cf_bm",
55        provider: AntiBotProvider::Cloudflare,
56        weight: 4,
57    },
58    Signature {
59        needle: "cdn-cgi/challenge-platform",
60        provider: AntiBotProvider::Cloudflare,
61        weight: 4,
62    },
63    Signature {
64        needle: "attention required! | cloudflare",
65        provider: AntiBotProvider::Cloudflare,
66        weight: 4,
67    },
68    Signature {
69        needle: "_abck",
70        provider: AntiBotProvider::Akamai,
71        weight: 5,
72    },
73    Signature {
74        needle: "bm_sv",
75        provider: AntiBotProvider::Akamai,
76        weight: 5,
77    },
78    Signature {
79        needle: "akamai",
80        provider: AntiBotProvider::Akamai,
81        weight: 2,
82    },
83    Signature {
84        needle: "_px",
85        provider: AntiBotProvider::PerimeterX,
86        weight: 5,
87    },
88    Signature {
89        needle: "perimeterx",
90        provider: AntiBotProvider::PerimeterX,
91        weight: 4,
92    },
93    Signature {
94        needle: "humansecurity",
95        provider: AntiBotProvider::PerimeterX,
96        weight: 3,
97    },
98    Signature {
99        needle: "x-kpsdk",
100        provider: AntiBotProvider::Kasada,
101        weight: 5,
102    },
103    Signature {
104        needle: "kasada",
105        provider: AntiBotProvider::Kasada,
106        weight: 4,
107    },
108    Signature {
109        needle: "x-fpjs",
110        provider: AntiBotProvider::FingerprintCom,
111        weight: 4,
112    },
113    Signature {
114        needle: "fingerprint.com",
115        provider: AntiBotProvider::FingerprintCom,
116        weight: 3,
117    },
118];
119
120struct SignatureAnalyzer {
121    version: AnalyzerVersion,
122}
123
124impl ProviderAnalyzer for SignatureAnalyzer {
125    fn version(&self) -> AnalyzerVersion {
126        self.version
127    }
128
129    fn classify_transaction(&self, tx: &TransactionView) -> Detection {
130        match self.version {
131            AnalyzerVersion::V1 | AnalyzerVersion::V1Legacy => classify_transaction_v1(tx),
132        }
133    }
134}
135
136const fn select_analyzer(version: AnalyzerVersion) -> SignatureAnalyzer {
137    SignatureAnalyzer { version }
138}
139
140/// Classify a transaction view into a likely anti-bot provider.
141#[must_use]
142pub fn classify_transaction(tx: &TransactionView) -> Detection {
143    classify_transaction_with_profile(tx, &AnalyzerProfile::default())
144}
145
146/// Classify one transaction using the analyzer selected by profile.
147#[must_use]
148pub fn classify_transaction_with_profile(
149    tx: &TransactionView,
150    profile: &AnalyzerProfile,
151) -> Detection {
152    select_analyzer(profile.analyzer_version).classify_transaction(tx)
153}
154
155fn classify_transaction_v1(tx: &TransactionView) -> Detection {
156    let mut scores: BTreeMap<AntiBotProvider, ProviderScore> = BTreeMap::new();
157
158    for provider in [
159        AntiBotProvider::DataDome,
160        AntiBotProvider::Cloudflare,
161        AntiBotProvider::Akamai,
162        AntiBotProvider::PerimeterX,
163        AntiBotProvider::Kasada,
164        AntiBotProvider::FingerprintCom,
165    ] {
166        let _prev = scores.insert(
167            provider,
168            ProviderScore {
169                provider,
170                score: 0,
171                markers: Vec::new(),
172            },
173        );
174    }
175
176    let normalized_headers = normalize_headers(&tx.response_headers);
177    let body = tx
178        .response_body_snippet
179        .as_ref()
180        .map_or_else(String::new, |s| s.to_lowercase());
181
182    let mut haystacks = String::new();
183    haystacks.push_str(&tx.url.to_lowercase());
184    haystacks.push('\n');
185    haystacks.push_str(&normalized_headers);
186    haystacks.push('\n');
187    haystacks.push_str(&body);
188
189    for sig in SIGNATURES {
190        if haystacks.contains(sig.needle)
191            && let Some(score) = scores.get_mut(&sig.provider)
192        {
193            score.score = score.score.saturating_add(sig.weight);
194            score.markers.push(sig.needle.to_string());
195        }
196    }
197
198    // 403/429 can increase confidence but does not imply a specific vendor.
199    if tx.status == 403 || tx.status == 429 {
200        for provider in [AntiBotProvider::DataDome, AntiBotProvider::Cloudflare] {
201            if let Some(score) = scores.get_mut(&provider)
202                && score.score > 0
203            {
204                score.score = score.score.saturating_add(1);
205                score.markers.push(format!("status:{}", tx.status));
206            }
207        }
208    }
209
210    let mut ordered: Vec<ProviderScore> = scores.into_values().collect();
211    ordered.sort_by_key(|score| Reverse(score.score));
212
213    let top = ordered.first();
214    let second = ordered.get(1);
215
216    match (top, second) {
217        (Some(primary), Some(secondary)) if primary.score > 0 => {
218            let denom = primary.score + secondary.score;
219            let confidence = if denom == 0 {
220                0.0
221            } else {
222                f64::from(primary.score) / f64::from(denom)
223            };
224            Detection {
225                provider: primary.provider,
226                confidence,
227                markers: primary.markers.clone(),
228            }
229        }
230        (Some(primary), _) if primary.score > 0 => Detection {
231            provider: primary.provider,
232            confidence: 1.0,
233            markers: primary.markers.clone(),
234        },
235        _ => Detection {
236            provider: AntiBotProvider::Unknown,
237            confidence: 0.0,
238            markers: Vec::new(),
239        },
240    }
241}
242
243/// Classify all entries in a HAR JSON payload.
244///
245/// # Errors
246///
247/// Returns [`har::HarError`] when the input is not valid HAR JSON
248/// or is missing required HAR fields.
249pub fn classify_har(har_json: &str) -> Result<HarClassificationReport, har::HarError> {
250    classify_har_with_profile(har_json, &AnalyzerProfile::default())
251}
252
253/// Classify a HAR payload using the analyzer selected by profile.
254///
255/// # Errors
256///
257/// Returns [`har::HarError`] when the input is not valid HAR JSON
258/// or is missing required HAR fields.
259pub fn classify_har_with_profile(
260    har_json: &str,
261    profile: &AnalyzerProfile,
262) -> Result<HarClassificationReport, har::HarError> {
263    select_analyzer(profile.analyzer_version).classify_har(har_json)
264}
265
266fn normalize_headers(headers: &BTreeMap<String, String>) -> String {
267    let mut normalized = String::new();
268    for (key, value) in headers {
269        normalized.push_str(&key.to_lowercase());
270        normalized.push(':');
271        normalized.push_str(&value.to_lowercase());
272        normalized.push('\n');
273    }
274    normalized
275}
276
277#[cfg(test)]
278mod tests {
279    use std::collections::BTreeMap;
280
281    use super::*;
282
283    #[test]
284    fn classifies_datadome_from_headers() {
285        let mut headers = BTreeMap::new();
286        let _ = headers.insert("x-datadome".to_string(), "protected".to_string());
287        let _ = headers.insert("x-datadome-cid".to_string(), "abc".to_string());
288        let _ = headers.insert("set-cookie".to_string(), "datadome=xyz; Path=/".to_string());
289
290        let tx = TransactionView {
291            url: "https://www.g2.com/".to_string(),
292            status: 403,
293            response_headers: headers,
294            response_body_snippet: Some("Please enable JS".to_string()),
295        };
296
297        let detection = classify_transaction(&tx);
298
299        assert_eq!(detection.provider, AntiBotProvider::DataDome);
300        assert!(detection.confidence > 0.5);
301    }
302
303    #[test]
304    fn classifies_cloudflare_from_body_and_headers() {
305        let mut headers = BTreeMap::new();
306        let _ = headers.insert("server".to_string(), "cloudflare".to_string());
307        let _ = headers.insert("cf-ray".to_string(), "123-ORD".to_string());
308
309        let tx = TransactionView {
310            url: "https://www.capterra.com/".to_string(),
311            status: 403,
312            response_headers: headers,
313            response_body_snippet: Some("Attention Required! | Cloudflare".to_string()),
314        };
315
316        let detection = classify_transaction(&tx);
317
318        assert_eq!(detection.provider, AntiBotProvider::Cloudflare);
319        assert!(detection.confidence > 0.5);
320    }
321
322    #[test]
323    fn profile_selected_analyzer_matches_default_classifier_for_v1() {
324        let mut headers = BTreeMap::new();
325        let _ = headers.insert("cf-ray".to_string(), "123-ORD".to_string());
326        let tx = TransactionView {
327            url: "https://example.com".to_string(),
328            status: 403,
329            response_headers: headers,
330            response_body_snippet: Some("Attention Required! | Cloudflare".to_string()),
331        };
332
333        let baseline = classify_transaction(&tx);
334        let profile = AnalyzerProfile {
335            profile_id: "canary".to_string(),
336            analyzer_version: AnalyzerVersion::V1,
337        };
338        let selected = classify_transaction_with_profile(&tx, &profile);
339
340        assert_eq!(baseline, selected);
341    }
342}