Skip to main content

stygian_charon/
backtest.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use serde::{Deserialize, Serialize};
4use thiserror::Error;
5
6use crate::analyzer::{AnalyzerProfile, AnalyzerVersion};
7use crate::classifier::classify_har_with_profile;
8use crate::har;
9use crate::types::AntiBotProvider;
10
11/// One historical HAR input used for profile backtesting.
12#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
13pub struct BacktestCase {
14    /// Stable case identifier (fixture name, date, or target key).
15    pub case_id: String,
16    /// HAR payload to replay for all profiles.
17    pub har_json: String,
18}
19
20/// One profile result for one backtest case.
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
22pub struct BacktestSample {
23    /// Case identifier from the source corpus.
24    pub case_id: String,
25    /// Profile identifier used for this run.
26    pub profile_id: String,
27    /// Analyzer version that executed this sample.
28    pub analyzer_version: AnalyzerVersion,
29    /// Aggregate provider prediction for the replayed HAR.
30    pub provider: AntiBotProvider,
31    /// Confidence score for the aggregate prediction.
32    pub confidence: f64,
33    /// Number of requests classified in this case.
34    pub request_count: usize,
35    /// Number of suspicious requests (blocked/challenged or non-unknown provider).
36    pub suspicious_request_count: usize,
37}
38
39/// Disagreement detected across profiles for a single case.
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
41pub struct BacktestDisagreement {
42    /// Case where profiles disagreed.
43    pub case_id: String,
44    /// Provider selected by each profile.
45    pub providers_by_profile: BTreeMap<String, AntiBotProvider>,
46}
47
48/// Per-profile aggregate metrics computed from backtest samples.
49///
50/// Metrics help identify profiles that underperform compared to baseline,
51/// enabling data-driven decisions about rule rollout and SLO adjustments.
52#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ProfileMetrics {
54    /// Profile identifier.
55    pub profile_id: String,
56    /// Total cases analyzed for this profile.
57    pub total_samples: usize,
58    /// Average confidence score across all samples (0.0–1.0).
59    pub avg_confidence: f64,
60    /// Percentage of samples where this profile detected suspicious activity.
61    pub detection_rate: f64,
62    /// Number of disagreement cases where this profile diverged from other profiles.
63    pub disagreement_count: usize,
64    /// Number of cases with confidence < 0.5 (potentially false positives).
65    pub low_confidence_count: usize,
66    /// Ratio of cases with low confidence (0.0–1.0).
67    pub low_confidence_rate: f64,
68}
69
70/// Aggregate output for profile backtesting over historical HARs.
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub struct BacktestReport {
73    /// Number of cases in the input corpus.
74    pub total_cases: usize,
75    /// Number of profiles replayed for each case.
76    pub total_profiles: usize,
77    /// Flattened case x profile matrix of results.
78    pub samples: Vec<BacktestSample>,
79    /// Cases where profile predictions diverged.
80    pub disagreements: Vec<BacktestDisagreement>,
81    /// Aggregate metrics per profile (optional; computed on demand).
82    #[serde(default)]
83    pub profile_metrics: BTreeMap<String, ProfileMetrics>,
84}
85
86/// Errors returned by [`run_profile_backtest`].
87#[derive(Debug, Error)]
88pub enum BacktestError {
89    /// No cases were provided.
90    #[error("backtest corpus must contain at least one case")]
91    EmptyCorpus,
92    /// No profiles were provided.
93    #[error("backtest must include at least one analyzer profile")]
94    EmptyProfiles,
95    /// HAR payload parsing/classification failed.
96    #[error(transparent)]
97    Har(#[from] har::HarError),
98}
99
100/// Replay historical HAR cases against existing analyzer profiles.
101///
102/// # Errors
103///
104/// Returns [`BacktestError::EmptyCorpus`] when no cases are supplied,
105/// [`BacktestError::EmptyProfiles`] when no profiles are supplied, and
106/// [`BacktestError::Har`] when any HAR payload cannot be parsed.
107///
108/// # Example
109///
110/// ```rust
111/// use stygian_charon::AnalyzerProfile;
112/// use stygian_charon::AnalyzerVersion;
113/// use stygian_charon::BacktestCase;
114/// use stygian_charon::run_profile_backtest;
115///
116/// let corpus = vec![BacktestCase {
117///     case_id: "fixture-a".to_string(),
118///     har_json: r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[{"startedDateTime":"2026-01-01T00:00:00Z","time":1,"request":{"method":"GET","url":"https://example.com","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},"response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1","headers":[{"name":"cf-ray","value":"abc"}],"cookies":[],"content":{"size":0,"mimeType":"text/html","text":"Attention Required! | Cloudflare"},"redirectURL":"","headersSize":-1,"bodySize":-1},"cache":{},"timings":{"send":0,"wait":1,"receive":0}}]}}"#.to_string(),
119/// }];
120///
121/// let profiles = vec![AnalyzerProfile {
122///     profile_id: "default".to_string(),
123///     analyzer_version: AnalyzerVersion::V1,
124/// }];
125///
126/// let report = run_profile_backtest(&corpus, &profiles).unwrap();
127/// assert_eq!(report.samples.len(), 1);
128/// ```
129pub fn run_profile_backtest(
130    corpus: &[BacktestCase],
131    profiles: &[AnalyzerProfile],
132) -> Result<BacktestReport, BacktestError> {
133    if corpus.is_empty() {
134        return Err(BacktestError::EmptyCorpus);
135    }
136    if profiles.is_empty() {
137        return Err(BacktestError::EmptyProfiles);
138    }
139
140    let mut samples = Vec::new();
141
142    for case in corpus {
143        for profile in profiles {
144            let report = classify_har_with_profile(&case.har_json, profile)?;
145            let suspicious_request_count = report
146                .requests
147                .iter()
148                .filter(|request| {
149                    request.status == 403
150                        || request.status == 429
151                        || request.detection.provider != AntiBotProvider::Unknown
152                })
153                .count();
154
155            samples.push(BacktestSample {
156                case_id: case.case_id.clone(),
157                profile_id: profile.profile_id.clone(),
158                analyzer_version: profile.analyzer_version,
159                provider: report.aggregate.provider,
160                confidence: report.aggregate.confidence,
161                request_count: report.requests.len(),
162                suspicious_request_count,
163            });
164        }
165    }
166
167    let disagreements = compute_disagreements(&samples);
168    let profile_metrics = compute_profile_metrics(&samples, &disagreements);
169
170    Ok(BacktestReport {
171        total_cases: corpus.len(),
172        total_profiles: profiles.len(),
173        samples,
174        disagreements,
175        profile_metrics,
176    })
177}
178
179fn compute_disagreements(samples: &[BacktestSample]) -> Vec<BacktestDisagreement> {
180    let mut by_case: BTreeMap<&str, BTreeMap<String, AntiBotProvider>> = BTreeMap::new();
181
182    for sample in samples {
183        let entry = by_case.entry(&sample.case_id).or_default();
184        let _ = entry.insert(sample.profile_id.clone(), sample.provider);
185    }
186
187    by_case
188        .into_iter()
189        .filter_map(|(case_id, providers_by_profile)| {
190            let unique: BTreeSet<AntiBotProvider> =
191                providers_by_profile.values().copied().collect();
192            (unique.len() > 1).then(|| BacktestDisagreement {
193                case_id: case_id.to_string(),
194                providers_by_profile,
195            })
196        })
197        .collect()
198}
199
200fn usize_to_f64_saturating(value: usize) -> f64 {
201    f64::from(u32::try_from(value).unwrap_or(u32::MAX))
202}
203
204fn ratio_from_counts(numerator: usize, denominator: usize) -> f64 {
205    if denominator == 0 {
206        0.0
207    } else {
208        usize_to_f64_saturating(numerator) / usize_to_f64_saturating(denominator)
209    }
210}
211
212/// Compute per-profile aggregate metrics from backtest samples and disagreements.
213///
214/// Metrics include detection rate, average confidence, and disagreement frequency,
215/// which inform acceptance decisions during rule rollout.
216fn compute_profile_metrics(
217    samples: &[BacktestSample],
218    disagreements: &[BacktestDisagreement],
219) -> BTreeMap<String, ProfileMetrics> {
220    let mut by_profile: BTreeMap<String, Vec<&BacktestSample>> = BTreeMap::new();
221
222    // Group samples by profile
223    for sample in samples {
224        by_profile
225            .entry(sample.profile_id.clone())
226            .or_default()
227            .push(sample);
228    }
229
230    // Count disagreements per profile
231    let mut disagreement_counts: BTreeMap<String, usize> = BTreeMap::new();
232    for disagreement in disagreements {
233        for profile_id in disagreement.providers_by_profile.keys() {
234            *disagreement_counts.entry(profile_id.clone()).or_insert(0) += 1;
235        }
236    }
237
238    // Compute metrics for each profile
239    by_profile
240        .into_iter()
241        .map(|(profile_id, profile_samples)| {
242            let total_samples = profile_samples.len();
243
244            // Detection rate: percentage of cases with non-Unknown provider
245            let detected_count = profile_samples
246                .iter()
247                .filter(|s| s.provider != AntiBotProvider::Unknown)
248                .count();
249            let detection_rate = ratio_from_counts(detected_count, total_samples);
250
251            // Average confidence score
252            let avg_confidence = if total_samples > 0 {
253                profile_samples.iter().map(|s| s.confidence).sum::<f64>()
254                    / usize_to_f64_saturating(total_samples)
255            } else {
256                0.0
257            };
258
259            // Low confidence (potential false positive indicator)
260            let low_confidence_count = profile_samples
261                .iter()
262                .filter(|s| s.confidence < 0.5)
263                .count();
264            let low_confidence_rate = ratio_from_counts(low_confidence_count, total_samples);
265
266            // Disagreement count for this profile
267            let disagreement_count = disagreement_counts.get(&profile_id).copied().unwrap_or(0);
268
269            (
270                profile_id.clone(),
271                ProfileMetrics {
272                    profile_id,
273                    total_samples,
274                    avg_confidence,
275                    detection_rate,
276                    disagreement_count,
277                    low_confidence_count,
278                    low_confidence_rate,
279                },
280            )
281        })
282        .collect()
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    const CLOUDFLARE_HAR: &str = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[{"startedDateTime":"2026-01-01T00:00:00Z","time":1,"request":{"method":"GET","url":"https://example.com","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},"response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1","headers":[{"name":"cf-ray","value":"abc"},{"name":"server","value":"cloudflare"}],"cookies":[],"content":{"size":0,"mimeType":"text/html","text":"Attention Required! | Cloudflare"},"redirectURL":"","headersSize":-1,"bodySize":-1},"cache":{},"timings":{"send":0,"wait":1,"receive":0}}]}}"#;
290
291    #[test]
292    fn backtest_generates_case_profile_matrix() {
293        let corpus = vec![BacktestCase {
294            case_id: "case-1".to_string(),
295            har_json: CLOUDFLARE_HAR.to_string(),
296        }];
297
298        let profiles = vec![AnalyzerProfile {
299            profile_id: "default".to_string(),
300            analyzer_version: AnalyzerVersion::V1,
301        }];
302
303        let result = run_profile_backtest(&corpus, &profiles);
304        assert!(result.is_ok());
305
306        if let Ok(report) = result {
307            assert_eq!(report.total_cases, 1);
308            assert_eq!(report.total_profiles, 1);
309            assert_eq!(report.samples.len(), 1);
310            let first = report.samples.first();
311            assert!(first.is_some(), "expected at least one sample");
312            if let Some(first_sample) = first {
313                assert_eq!(first_sample.provider, AntiBotProvider::Cloudflare);
314            }
315            assert!(report.disagreements.is_empty());
316        }
317    }
318
319    #[test]
320    fn backtest_rejects_empty_inputs() {
321        let profiles = vec![AnalyzerProfile {
322            profile_id: "default".to_string(),
323            analyzer_version: AnalyzerVersion::V1,
324        }];
325
326        let no_cases = run_profile_backtest(&[], &profiles);
327        assert!(matches!(no_cases, Err(BacktestError::EmptyCorpus)));
328
329        let corpus = vec![BacktestCase {
330            case_id: "case-1".to_string(),
331            har_json: CLOUDFLARE_HAR.to_string(),
332        }];
333        let no_profiles = run_profile_backtest(&corpus, &[]);
334        assert!(matches!(no_profiles, Err(BacktestError::EmptyProfiles)));
335    }
336
337    #[test]
338    fn compute_disagreements_flags_divergent_predictions() {
339        let samples = vec![
340            BacktestSample {
341                case_id: "case-1".to_string(),
342                profile_id: "profile-a".to_string(),
343                analyzer_version: AnalyzerVersion::V1,
344                provider: AntiBotProvider::Cloudflare,
345                confidence: 0.9,
346                request_count: 1,
347                suspicious_request_count: 1,
348            },
349            BacktestSample {
350                case_id: "case-1".to_string(),
351                profile_id: "profile-b".to_string(),
352                analyzer_version: AnalyzerVersion::V1Legacy,
353                provider: AntiBotProvider::DataDome,
354                confidence: 0.8,
355                request_count: 1,
356                suspicious_request_count: 1,
357            },
358        ];
359
360        let disagreements = compute_disagreements(&samples);
361        assert_eq!(disagreements.len(), 1);
362        let first = disagreements.first();
363        assert!(first.is_some(), "expected one disagreement");
364        if let Some(first_disagreement) = first {
365            assert_eq!(first_disagreement.case_id, "case-1");
366        }
367    }
368}