1use std::cmp::Reverse;
2use std::collections::BTreeMap;
3
4use crate::analyzer::{AnalyzerProfile, AnalyzerVersion, ProviderAnalyzer};
5use crate::har;
6use crate::types::{
7 AntiBotProvider, Detection, HarClassificationReport, ProviderScore, TransactionView,
8};
9
10#[derive(Debug, Clone, Copy)]
11struct Signature {
12 needle: &'static str,
13 provider: AntiBotProvider,
14 weight: u32,
15}
16
17const SIGNATURES: &[Signature] = &[
18 Signature {
19 needle: "x-datadome",
20 provider: AntiBotProvider::DataDome,
21 weight: 5,
22 },
23 Signature {
24 needle: "x-datadome-cid",
25 provider: AntiBotProvider::DataDome,
26 weight: 5,
27 },
28 Signature {
29 needle: "x-dd-b",
30 provider: AntiBotProvider::DataDome,
31 weight: 4,
32 },
33 Signature {
34 needle: "datadome=",
35 provider: AntiBotProvider::DataDome,
36 weight: 4,
37 },
38 Signature {
39 needle: "captcha-delivery.com",
40 provider: AntiBotProvider::DataDome,
41 weight: 4,
42 },
43 Signature {
44 needle: "server:cloudflare",
45 provider: AntiBotProvider::Cloudflare,
46 weight: 3,
47 },
48 Signature {
49 needle: "cf-ray",
50 provider: AntiBotProvider::Cloudflare,
51 weight: 5,
52 },
53 Signature {
54 needle: "__cf_bm",
55 provider: AntiBotProvider::Cloudflare,
56 weight: 4,
57 },
58 Signature {
59 needle: "cdn-cgi/challenge-platform",
60 provider: AntiBotProvider::Cloudflare,
61 weight: 4,
62 },
63 Signature {
64 needle: "attention required! | cloudflare",
65 provider: AntiBotProvider::Cloudflare,
66 weight: 4,
67 },
68 Signature {
69 needle: "_abck",
70 provider: AntiBotProvider::Akamai,
71 weight: 5,
72 },
73 Signature {
74 needle: "bm_sv",
75 provider: AntiBotProvider::Akamai,
76 weight: 5,
77 },
78 Signature {
79 needle: "akamai",
80 provider: AntiBotProvider::Akamai,
81 weight: 2,
82 },
83 Signature {
84 needle: "_px",
85 provider: AntiBotProvider::PerimeterX,
86 weight: 5,
87 },
88 Signature {
89 needle: "perimeterx",
90 provider: AntiBotProvider::PerimeterX,
91 weight: 4,
92 },
93 Signature {
94 needle: "humansecurity",
95 provider: AntiBotProvider::PerimeterX,
96 weight: 3,
97 },
98 Signature {
99 needle: "x-kpsdk",
100 provider: AntiBotProvider::Kasada,
101 weight: 5,
102 },
103 Signature {
104 needle: "kasada",
105 provider: AntiBotProvider::Kasada,
106 weight: 4,
107 },
108 Signature {
109 needle: "x-fpjs",
110 provider: AntiBotProvider::FingerprintCom,
111 weight: 4,
112 },
113 Signature {
114 needle: "fingerprint.com",
115 provider: AntiBotProvider::FingerprintCom,
116 weight: 3,
117 },
118];
119
120struct SignatureAnalyzer {
121 version: AnalyzerVersion,
122}
123
124impl ProviderAnalyzer for SignatureAnalyzer {
125 fn version(&self) -> AnalyzerVersion {
126 self.version
127 }
128
129 fn classify_transaction(&self, tx: &TransactionView) -> Detection {
130 match self.version {
131 AnalyzerVersion::V1 | AnalyzerVersion::V1Legacy => classify_transaction_v1(tx),
132 }
133 }
134}
135
136const fn select_analyzer(version: AnalyzerVersion) -> SignatureAnalyzer {
137 SignatureAnalyzer { version }
138}
139
140#[must_use]
142pub fn classify_transaction(tx: &TransactionView) -> Detection {
143 classify_transaction_with_profile(tx, &AnalyzerProfile::default())
144}
145
146#[must_use]
148pub fn classify_transaction_with_profile(
149 tx: &TransactionView,
150 profile: &AnalyzerProfile,
151) -> Detection {
152 select_analyzer(profile.analyzer_version).classify_transaction(tx)
153}
154
155fn classify_transaction_v1(tx: &TransactionView) -> Detection {
156 let mut scores: BTreeMap<AntiBotProvider, ProviderScore> = BTreeMap::new();
157
158 for provider in [
159 AntiBotProvider::DataDome,
160 AntiBotProvider::Cloudflare,
161 AntiBotProvider::Akamai,
162 AntiBotProvider::PerimeterX,
163 AntiBotProvider::Kasada,
164 AntiBotProvider::FingerprintCom,
165 ] {
166 let _prev = scores.insert(
167 provider,
168 ProviderScore {
169 provider,
170 score: 0,
171 markers: Vec::new(),
172 },
173 );
174 }
175
176 let normalized_headers = normalize_headers(&tx.response_headers);
177 let body = tx
178 .response_body_snippet
179 .as_ref()
180 .map_or_else(String::new, |s| s.to_lowercase());
181
182 let mut haystacks = String::new();
183 haystacks.push_str(&tx.url.to_lowercase());
184 haystacks.push('\n');
185 haystacks.push_str(&normalized_headers);
186 haystacks.push('\n');
187 haystacks.push_str(&body);
188
189 for sig in SIGNATURES {
190 if haystacks.contains(sig.needle)
191 && let Some(score) = scores.get_mut(&sig.provider)
192 {
193 score.score = score.score.saturating_add(sig.weight);
194 score.markers.push(sig.needle.to_string());
195 }
196 }
197
198 if tx.status == 403 || tx.status == 429 {
200 for provider in [AntiBotProvider::DataDome, AntiBotProvider::Cloudflare] {
201 if let Some(score) = scores.get_mut(&provider)
202 && score.score > 0
203 {
204 score.score = score.score.saturating_add(1);
205 score.markers.push(format!("status:{}", tx.status));
206 }
207 }
208 }
209
210 let mut ordered: Vec<ProviderScore> = scores.into_values().collect();
211 ordered.sort_by_key(|score| Reverse(score.score));
212
213 let top = ordered.first();
214 let second = ordered.get(1);
215
216 match (top, second) {
217 (Some(primary), Some(secondary)) if primary.score > 0 => {
218 let denom = primary.score + secondary.score;
219 let confidence = if denom == 0 {
220 0.0
221 } else {
222 f64::from(primary.score) / f64::from(denom)
223 };
224 Detection {
225 provider: primary.provider,
226 confidence,
227 markers: primary.markers.clone(),
228 }
229 }
230 (Some(primary), _) if primary.score > 0 => Detection {
231 provider: primary.provider,
232 confidence: 1.0,
233 markers: primary.markers.clone(),
234 },
235 _ => Detection {
236 provider: AntiBotProvider::Unknown,
237 confidence: 0.0,
238 markers: Vec::new(),
239 },
240 }
241}
242
243pub fn classify_har(har_json: &str) -> Result<HarClassificationReport, har::HarError> {
250 classify_har_with_profile(har_json, &AnalyzerProfile::default())
251}
252
253pub fn classify_har_with_profile(
260 har_json: &str,
261 profile: &AnalyzerProfile,
262) -> Result<HarClassificationReport, har::HarError> {
263 select_analyzer(profile.analyzer_version).classify_har(har_json)
264}
265
266fn normalize_headers(headers: &BTreeMap<String, String>) -> String {
267 let mut normalized = String::new();
268 for (key, value) in headers {
269 normalized.push_str(&key.to_lowercase());
270 normalized.push(':');
271 normalized.push_str(&value.to_lowercase());
272 normalized.push('\n');
273 }
274 normalized
275}
276
277#[cfg(test)]
278mod tests {
279 use std::collections::BTreeMap;
280
281 use super::*;
282
283 #[test]
284 fn classifies_datadome_from_headers() {
285 let mut headers = BTreeMap::new();
286 let _ = headers.insert("x-datadome".to_string(), "protected".to_string());
287 let _ = headers.insert("x-datadome-cid".to_string(), "abc".to_string());
288 let _ = headers.insert("set-cookie".to_string(), "datadome=xyz; Path=/".to_string());
289
290 let tx = TransactionView {
291 url: "https://www.g2.com/".to_string(),
292 status: 403,
293 response_headers: headers,
294 response_body_snippet: Some("Please enable JS".to_string()),
295 };
296
297 let detection = classify_transaction(&tx);
298
299 assert_eq!(detection.provider, AntiBotProvider::DataDome);
300 assert!(detection.confidence > 0.5);
301 }
302
303 #[test]
304 fn classifies_cloudflare_from_body_and_headers() {
305 let mut headers = BTreeMap::new();
306 let _ = headers.insert("server".to_string(), "cloudflare".to_string());
307 let _ = headers.insert("cf-ray".to_string(), "123-ORD".to_string());
308
309 let tx = TransactionView {
310 url: "https://www.capterra.com/".to_string(),
311 status: 403,
312 response_headers: headers,
313 response_body_snippet: Some("Attention Required! | Cloudflare".to_string()),
314 };
315
316 let detection = classify_transaction(&tx);
317
318 assert_eq!(detection.provider, AntiBotProvider::Cloudflare);
319 assert!(detection.confidence > 0.5);
320 }
321
322 #[test]
323 fn profile_selected_analyzer_matches_default_classifier_for_v1() {
324 let mut headers = BTreeMap::new();
325 let _ = headers.insert("cf-ray".to_string(), "123-ORD".to_string());
326 let tx = TransactionView {
327 url: "https://example.com".to_string(),
328 status: 403,
329 response_headers: headers,
330 response_body_snippet: Some("Attention Required! | Cloudflare".to_string()),
331 };
332
333 let baseline = classify_transaction(&tx);
334 let profile = AnalyzerProfile {
335 profile_id: "canary".to_string(),
336 analyzer_version: AnalyzerVersion::V1,
337 };
338 let selected = classify_transaction_with_profile(&tx, &profile);
339
340 assert_eq!(baseline, selected);
341 }
342}