Skip to main content

stygian_charon/
probe.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::classifier::classify_transaction;
6use crate::types::{AntiBotProvider, Detection, TransactionView};
7
8/// Classification of how a probe exercises the detection system.
9///
10/// Used to group expected behaviour in regression runs.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
12pub enum ProbeCategory {
13    /// Clean, everyday traffic with no bot-protection signals.
14    Benign,
15    /// Partial or ambiguous signals that may or may not trigger detection.
16    Suspicious,
17    /// Full adversarial signals; a well-tuned analyzer must return the expected provider.
18    Adversarial,
19    /// Edge cases that exercise boundary conditions (empty headers, unusual status codes, etc.).
20    EdgeCase,
21}
22
23/// Expected detection outcome for a probe.
24///
25/// A probe passes when the classified provider matches `expected_provider` and
26/// the confidence is at least `min_confidence`.
27#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
28pub struct ProbeExpectation {
29    /// Provider the probe expects to detect.
30    pub expected_provider: AntiBotProvider,
31    /// Minimum confidence threshold in `[0.0, 1.0]`.  `0.0` accepts any confidence.
32    pub min_confidence: f64,
33}
34
35/// A single challenge-style probe with its input, expected outcome, and metadata.
36///
37/// # Example
38///
39/// ```rust
40/// use stygian_charon::probe::{ChallengeProbe, ProbeCategory, ProbeExpectation};
41/// use stygian_charon::types::{AntiBotProvider, TransactionView};
42/// use std::collections::BTreeMap;
43///
44/// let probe = ChallengeProbe {
45///     name: "cf-ray-header".to_string(),
46///     description: "Cloudflare CF-Ray header present".to_string(),
47///     category: ProbeCategory::Adversarial,
48///     transaction: TransactionView {
49///         url: "https://example.com/".to_string(),
50///         status: 403,
51///         response_headers: {
52///             let mut h = BTreeMap::new();
53///             h.insert("cf-ray".to_string(), "abc123-LHR".to_string());
54///             h
55///         },
56///         response_body_snippet: None,
57///     },
58///     expectation: ProbeExpectation {
59///         expected_provider: AntiBotProvider::Cloudflare,
60///         min_confidence: 0.5,
61///     },
62/// };
63/// ```
64#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
65pub struct ChallengeProbe {
66    /// Short identifier used in reports.
67    pub name: String,
68    /// Human-readable description of what this probe covers.
69    pub description: String,
70    /// Probe category.
71    pub category: ProbeCategory,
72    /// Synthetic transaction to classify.
73    pub transaction: TransactionView,
74    /// Expected outcome.
75    pub expectation: ProbeExpectation,
76}
77
78/// Outcome of running a single probe.
79#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
80pub struct ProbeRunResult {
81    /// Probe name.
82    pub name: String,
83    /// Probe category.
84    pub category: ProbeCategory,
85    /// Actual detection produced by the classifier.
86    pub actual: Detection,
87    /// Expected outcome.
88    pub expectation: ProbeExpectation,
89    /// Whether the probe passed.
90    pub passed: bool,
91    /// Failure reason when `!passed`.
92    pub failure_reason: Option<String>,
93}
94
95/// Summary report produced by running a full probe pack.
96///
97/// # Example
98///
99/// ```rust
100/// use stygian_charon::probe::{run_probe_pack, challenge_probe_pack};
101///
102/// let report = run_probe_pack(&challenge_probe_pack());
103/// assert_eq!(report.total, report.passed + report.failed);
104/// ```
105#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
106pub struct ProbePackReport {
107    /// Total probes run.
108    pub total: usize,
109    /// Probes that passed.
110    pub passed: usize,
111    /// Probes that failed.
112    pub failed: usize,
113    /// Individual results (sorted: failures first, then by category, then by name).
114    pub results: Vec<ProbeRunResult>,
115    /// Whether the full pack passed with no failures.
116    pub all_passed: bool,
117}
118
119impl ProbePackReport {
120    /// Returns only the failed results.
121    #[must_use]
122    pub fn failures(&self) -> Vec<&ProbeRunResult> {
123        self.results.iter().filter(|r| !r.passed).collect()
124    }
125}
126
127/// Run a probe pack against the default `V1` classifier and return a report.
128///
129/// # Example
130///
131/// ```rust
132/// use stygian_charon::probe::{run_probe_pack, challenge_probe_pack};
133///
134/// let report = run_probe_pack(&challenge_probe_pack());
135/// assert!(report.all_passed, "probe pack regressions: {:?}", report.failures());
136/// ```
137#[must_use]
138pub fn run_probe_pack(probes: &[ChallengeProbe]) -> ProbePackReport {
139    let mut results: Vec<ProbeRunResult> = probes.iter().map(run_one_probe).collect();
140
141    results.sort_by(|a, b| {
142        b.passed
143            .cmp(&a.passed)
144            .then(a.category.cmp(&b.category))
145            .then(a.name.cmp(&b.name))
146    });
147
148    let passed = results.iter().filter(|r| r.passed).count();
149    let failed = results.len() - passed;
150    let total = results.len();
151
152    ProbePackReport {
153        total,
154        passed,
155        failed,
156        all_passed: failed == 0,
157        results,
158    }
159}
160
161fn run_one_probe(probe: &ChallengeProbe) -> ProbeRunResult {
162    let actual = classify_transaction(&probe.transaction);
163
164    let passed = actual.provider == probe.expectation.expected_provider
165        && actual.confidence >= probe.expectation.min_confidence;
166
167    let failure_reason = if passed {
168        None
169    } else {
170        let mut reasons = Vec::new();
171        if actual.provider != probe.expectation.expected_provider {
172            reasons.push(format!(
173                "provider: expected {:?}, got {:?}",
174                probe.expectation.expected_provider, actual.provider
175            ));
176        }
177        if actual.confidence < probe.expectation.min_confidence {
178            reasons.push(format!(
179                "confidence: expected >= {:.2}, got {:.2}",
180                probe.expectation.min_confidence, actual.confidence
181            ));
182        }
183        Some(reasons.join("; "))
184    };
185
186    ProbeRunResult {
187        name: probe.name.clone(),
188        category: probe.category,
189        actual,
190        expectation: probe.expectation.clone(),
191        passed,
192        failure_reason,
193    }
194}
195
196/// Build the canonical challenge probe pack.
197///
198/// Returns the built-in set of benign, suspicious, adversarial, and edge-case probes.
199///
200/// # Example
201///
202/// ```rust
203/// use stygian_charon::probe::challenge_probe_pack;
204///
205/// let probes = challenge_probe_pack();
206/// assert!(!probes.is_empty());
207/// ```
208#[must_use]
209pub fn challenge_probe_pack() -> Vec<ChallengeProbe> {
210    let mut probes = Vec::new();
211    probes.extend(build_benign_probes());
212    probes.extend(build_suspicious_probes());
213    probes.extend(build_adversarial_probes());
214    probes.extend(build_edge_case_probes());
215    probes
216}
217
218fn build_benign_probes() -> Vec<ChallengeProbe> {
219    vec![
220        ChallengeProbe {
221            name: "benign-200-ok".to_string(),
222            description: "Plain 200 OK response with no anti-bot headers".to_string(),
223            category: ProbeCategory::Benign,
224            transaction: TransactionView {
225                url: "https://example.com/page".to_string(),
226                status: 200,
227                response_headers: BTreeMap::new(),
228                response_body_snippet: None,
229            },
230            expectation: ProbeExpectation {
231                expected_provider: AntiBotProvider::Unknown,
232                min_confidence: 0.0,
233            },
234        },
235        ChallengeProbe {
236            name: "benign-cdn-headers".to_string(),
237            description: "Standard CDN headers that share no anti-bot signals".to_string(),
238            category: ProbeCategory::Benign,
239            transaction: TransactionView {
240                url: "https://example.com/api/v1/data".to_string(),
241                status: 200,
242                response_headers: {
243                    let mut h = BTreeMap::new();
244                    h.insert("content-type".to_string(), "application/json".to_string());
245                    h.insert(
246                        "cache-control".to_string(),
247                        "public, max-age=3600".to_string(),
248                    );
249                    h.insert("x-cache".to_string(), "HIT".to_string());
250                    h
251                },
252                response_body_snippet: None,
253            },
254            expectation: ProbeExpectation {
255                expected_provider: AntiBotProvider::Unknown,
256                min_confidence: 0.0,
257            },
258        },
259    ]
260}
261
262fn build_suspicious_probes() -> Vec<ChallengeProbe> {
263    vec![
264        ChallengeProbe {
265            name: "suspicious-akamai-partial".to_string(),
266            description: "Single low-weight Akamai marker; should detect Akamai".to_string(),
267            category: ProbeCategory::Suspicious,
268            transaction: TransactionView {
269                url: "https://example.com/product".to_string(),
270                status: 200,
271                response_headers: {
272                    let mut h = BTreeMap::new();
273                    h.insert("server".to_string(), "AkamaiGHost".to_string());
274                    h
275                },
276                response_body_snippet: Some("akamai".to_string()),
277            },
278            expectation: ProbeExpectation {
279                expected_provider: AntiBotProvider::Akamai,
280                min_confidence: 0.0,
281            },
282        },
283        ChallengeProbe {
284            name: "suspicious-fingerprint-partial".to_string(),
285            description: "One FingerprintJS URL reference in body".to_string(),
286            category: ProbeCategory::Suspicious,
287            transaction: TransactionView {
288                url: "https://example.com/checkout".to_string(),
289                status: 200,
290                response_headers: BTreeMap::new(),
291                response_body_snippet: Some("fingerprint.com/v3/agent".to_string()),
292            },
293            expectation: ProbeExpectation {
294                expected_provider: AntiBotProvider::FingerprintCom,
295                min_confidence: 0.0,
296            },
297        },
298    ]
299}
300
301fn build_adversarial_probes() -> Vec<ChallengeProbe> {
302    let mut probes = Vec::new();
303    probes.extend(build_adversarial_probes_part_one());
304    probes.extend(build_adversarial_probes_part_two());
305    probes
306}
307
308fn build_adversarial_probes_part_one() -> Vec<ChallengeProbe> {
309    vec![
310        ChallengeProbe {
311            name: "adversarial-datadome-full".to_string(),
312            description: "Full DataDome challenge: x-datadome + cookie + captcha URL".to_string(),
313            category: ProbeCategory::Adversarial,
314            transaction: TransactionView {
315                url: "https://target.com/page".to_string(),
316                status: 403,
317                response_headers: {
318                    let mut h = BTreeMap::new();
319                    h.insert("x-datadome".to_string(), "1".to_string());
320                    h.insert("x-datadome-cid".to_string(), "abc123".to_string());
321                    h.insert(
322                        "set-cookie".to_string(),
323                        "datadome=xyz; Domain=.target.com".to_string(),
324                    );
325                    h
326                },
327                response_body_snippet: Some(
328                    "Redirecting to captcha-delivery.com/captcha".to_string(),
329                ),
330            },
331            expectation: ProbeExpectation {
332                expected_provider: AntiBotProvider::DataDome,
333                min_confidence: 0.5,
334            },
335        },
336        ChallengeProbe {
337            name: "adversarial-cloudflare-challenge".to_string(),
338            description: "Cloudflare challenge page: CF-Ray + __cf_bm cookie + server header"
339                .to_string(),
340            category: ProbeCategory::Adversarial,
341            transaction: TransactionView {
342                url: "https://target.com/".to_string(),
343                status: 403,
344                response_headers: {
345                    let mut h = BTreeMap::new();
346                    h.insert("cf-ray".to_string(), "7a1b2c3d4e5f-LHR".to_string());
347                    h.insert("server".to_string(), "cloudflare".to_string());
348                    h.insert(
349                        "set-cookie".to_string(),
350                        "__cf_bm=token; SameSite=None".to_string(),
351                    );
352                    h
353                },
354                response_body_snippet: Some("Attention Required! | Cloudflare".to_string()),
355            },
356            expectation: ProbeExpectation {
357                expected_provider: AntiBotProvider::Cloudflare,
358                min_confidence: 0.5,
359            },
360        },
361        ChallengeProbe {
362            name: "adversarial-akamai-bot-manager".to_string(),
363            description: "Akamai Bot Manager: _abck + bm_sv cookies".to_string(),
364            category: ProbeCategory::Adversarial,
365            transaction: TransactionView {
366                url: "https://target.com/cart".to_string(),
367                status: 200,
368                response_headers: {
369                    let mut h = BTreeMap::new();
370                    h.insert(
371                        "set-cookie".to_string(),
372                        "_abck=sensor_data; bm_sv=session_token".to_string(),
373                    );
374                    h
375                },
376                response_body_snippet: None,
377            },
378            expectation: ProbeExpectation {
379                expected_provider: AntiBotProvider::Akamai,
380                min_confidence: 0.5,
381            },
382        },
383    ]
384}
385
386fn build_adversarial_probes_part_two() -> Vec<ChallengeProbe> {
387    vec![
388        ChallengeProbe {
389            name: "adversarial-perimeterx-block".to_string(),
390            description: "PerimeterX / Human Security block page".to_string(),
391            category: ProbeCategory::Adversarial,
392            transaction: TransactionView {
393                url: "https://target.com/search".to_string(),
394                status: 403,
395                response_headers: {
396                    let mut h = BTreeMap::new();
397                    h.insert("set-cookie".to_string(), "_px3=payload; Path=/".to_string());
398                    h
399                },
400                response_body_snippet: Some("perimeterx access denied".to_string()),
401            },
402            expectation: ProbeExpectation {
403                expected_provider: AntiBotProvider::PerimeterX,
404                min_confidence: 0.5,
405            },
406        },
407        ChallengeProbe {
408            name: "adversarial-kasada-block".to_string(),
409            description: "Kasada block with x-kpsdk header".to_string(),
410            category: ProbeCategory::Adversarial,
411            transaction: TransactionView {
412                url: "https://target.com/api/checkout".to_string(),
413                status: 429,
414                response_headers: {
415                    let mut h = BTreeMap::new();
416                    h.insert("x-kpsdk-ct".to_string(), "kasada-token".to_string());
417                    h.insert("x-kpsdk-cd".to_string(), "challenge-data".to_string());
418                    h
419                },
420                response_body_snippet: Some("kasada protection active".to_string()),
421            },
422            expectation: ProbeExpectation {
423                expected_provider: AntiBotProvider::Kasada,
424                min_confidence: 0.5,
425            },
426        },
427        ChallengeProbe {
428            name: "adversarial-fingerprintcom-full".to_string(),
429            description: "FingerprintJS Pro with x-fpjs header and body reference".to_string(),
430            category: ProbeCategory::Adversarial,
431            transaction: TransactionView {
432                url: "https://target.com/auth".to_string(),
433                status: 200,
434                response_headers: {
435                    let mut h = BTreeMap::new();
436                    h.insert("x-fpjs-region".to_string(), "us-east-1".to_string());
437                    h
438                },
439                response_body_snippet: Some(
440                    "https://api.fingerprint.com/v3/agent?apiKey=xyz".to_string(),
441                ),
442            },
443            expectation: ProbeExpectation {
444                expected_provider: AntiBotProvider::FingerprintCom,
445                min_confidence: 0.5,
446            },
447        },
448    ]
449}
450
451fn build_edge_case_probes() -> Vec<ChallengeProbe> {
452    vec![
453        ChallengeProbe {
454            name: "edge-empty-headers".to_string(),
455            description: "Transaction with no headers and no body; must not panic".to_string(),
456            category: ProbeCategory::EdgeCase,
457            transaction: TransactionView {
458                url: "https://example.com/".to_string(),
459                status: 200,
460                response_headers: BTreeMap::new(),
461                response_body_snippet: None,
462            },
463            expectation: ProbeExpectation {
464                expected_provider: AntiBotProvider::Unknown,
465                min_confidence: 0.0,
466            },
467        },
468        ChallengeProbe {
469            name: "edge-status-0".to_string(),
470            description: "Status code 0 (network error / timeout); must not panic".to_string(),
471            category: ProbeCategory::EdgeCase,
472            transaction: TransactionView {
473                url: "https://example.com/".to_string(),
474                status: 0,
475                response_headers: BTreeMap::new(),
476                response_body_snippet: None,
477            },
478            expectation: ProbeExpectation {
479                expected_provider: AntiBotProvider::Unknown,
480                min_confidence: 0.0,
481            },
482        },
483        ChallengeProbe {
484            name: "edge-mixed-case-header".to_string(),
485            description: "CF-Ray header with mixed case; classifier should normalise".to_string(),
486            category: ProbeCategory::EdgeCase,
487            transaction: TransactionView {
488                url: "https://target.com/".to_string(),
489                status: 200,
490                response_headers: {
491                    let mut h = BTreeMap::new();
492                    h.insert("CF-Ray".to_string(), "1234567890ab-SYD".to_string());
493                    h
494                },
495                response_body_snippet: None,
496            },
497            expectation: ProbeExpectation {
498                expected_provider: AntiBotProvider::Cloudflare,
499                min_confidence: 0.5,
500            },
501        },
502    ]
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508
509    #[test]
510    fn challenge_probe_pack_all_pass() {
511        let probes = challenge_probe_pack();
512        let report = run_probe_pack(&probes);
513        assert!(
514            report.all_passed,
515            "probe pack regressions detected:\n{:#?}",
516            report.failures()
517        );
518    }
519
520    #[test]
521    fn probe_pack_report_counts_are_consistent() {
522        let probes = challenge_probe_pack();
523        let report = run_probe_pack(&probes);
524        assert_eq!(report.total, probes.len());
525        assert_eq!(report.passed + report.failed, report.total);
526    }
527
528    #[test]
529    fn probe_pack_has_all_categories() {
530        let probes = challenge_probe_pack();
531        let categories: std::collections::BTreeSet<_> = probes.iter().map(|p| p.category).collect();
532        assert!(categories.contains(&ProbeCategory::Benign));
533        assert!(categories.contains(&ProbeCategory::Suspicious));
534        assert!(categories.contains(&ProbeCategory::Adversarial));
535        assert!(categories.contains(&ProbeCategory::EdgeCase));
536    }
537}