Skip to main content

stygian_charon/
investigation.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use crate::analyzer::AnalyzerProfile;
4#[cfg(feature = "caching")]
5use crate::cache::{InvestigationReportCache, investigation_cache_key};
6use crate::classifier::{classify_transaction, classify_transaction_with_profile};
7use crate::har;
8use crate::types::{
9    AdapterStrategy, AntiBotProvider, AntiBotRequirement, BlockedRatioSlo, Detection,
10    HarRequestSummary, HostSummary, IntegrationRecommendation, InvestigationDiff,
11    InvestigationReport, MarkerCount, RequirementLevel, RequirementsProfile, TargetClass,
12    TransactionView,
13};
14
15/// Build an investigation report from a HAR payload.
16///
17/// # Errors
18///
19/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
20pub fn investigate_har(har_json: &str) -> Result<InvestigationReport, har::HarError> {
21    investigate_har_with_classifier(har_json, classify_transaction)
22}
23
24/// Build an investigation report from a HAR payload using an explicit analyzer profile.
25///
26/// # Errors
27///
28/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
29pub fn investigate_har_with_profile(
30    har_json: &str,
31    profile: &AnalyzerProfile,
32) -> Result<InvestigationReport, har::HarError> {
33    investigate_har_with_classifier(har_json, |tx| {
34        classify_transaction_with_profile(tx, profile)
35    })
36}
37
38fn investigate_har_with_classifier<F>(
39    har_json: &str,
40    classify: F,
41) -> Result<InvestigationReport, har::HarError>
42where
43    F: Fn(&TransactionView) -> Detection,
44{
45    let parsed = har::parse_har_transactions(har_json)?;
46
47    let mut status_histogram: BTreeMap<u16, u64> = BTreeMap::new();
48    let mut resource_type_histogram: BTreeMap<String, u64> = BTreeMap::new();
49    let mut provider_histogram: BTreeMap<AntiBotProvider, u64> = BTreeMap::new();
50    let mut marker_histogram: BTreeMap<String, u64> = BTreeMap::new();
51    let mut host_accumulator: BTreeMap<String, HostSummary> = BTreeMap::new();
52
53    let mut blocked_requests = 0_u64;
54    let mut all_requests: Vec<HarRequestSummary> = Vec::new();
55    let mut suspicious_requests: Vec<HarRequestSummary> = Vec::new();
56
57    for req in parsed.requests {
58        let detection = classify(&req.transaction);
59
60        let summary = HarRequestSummary {
61            url: req.transaction.url.clone(),
62            status: req.transaction.status,
63            resource_type: req.resource_type.clone(),
64            detection,
65        };
66
67        let status_entry = status_histogram.entry(summary.status).or_insert(0);
68        *status_entry = status_entry.saturating_add(1);
69
70        let resource_label = summary
71            .resource_type
72            .clone()
73            .unwrap_or_else(|| "unknown".to_string());
74        let resource_entry = resource_type_histogram.entry(resource_label).or_insert(0);
75        *resource_entry = resource_entry.saturating_add(1);
76
77        let provider_entry = provider_histogram
78            .entry(summary.detection.provider)
79            .or_insert(0);
80        *provider_entry = provider_entry.saturating_add(1);
81
82        for marker in &summary.detection.markers {
83            let marker_entry = marker_histogram.entry(marker.clone()).or_insert(0);
84            *marker_entry = marker_entry.saturating_add(1);
85        }
86
87        let is_blocked = summary.status == 403 || summary.status == 429;
88        if is_blocked {
89            blocked_requests = blocked_requests.saturating_add(1);
90        }
91
92        let host = extract_host(&summary.url);
93        let host_summary = host_accumulator.entry(host.clone()).or_insert(HostSummary {
94            host,
95            total_requests: 0,
96            blocked_requests: 0,
97        });
98        host_summary.total_requests = host_summary.total_requests.saturating_add(1);
99        if is_blocked {
100            host_summary.blocked_requests = host_summary.blocked_requests.saturating_add(1);
101        }
102
103        let is_suspicious = is_blocked || summary.detection.provider != AntiBotProvider::Unknown;
104        if is_suspicious {
105            suspicious_requests.push(summary.clone());
106        }
107
108        all_requests.push(summary);
109    }
110
111    let total_requests = u64::try_from(all_requests.len()).unwrap_or(u64::MAX);
112
113    let aggregate = aggregate_detection(&all_requests);
114
115    let mut top_markers = marker_histogram
116        .iter()
117        .map(|(marker, count)| MarkerCount {
118            marker: marker.clone(),
119            count: *count,
120        })
121        .collect::<Vec<_>>();
122    top_markers.sort_by_key(|marker| std::cmp::Reverse(marker.count));
123    if top_markers.len() > 25 {
124        top_markers.truncate(25);
125    }
126
127    let mut hosts = host_accumulator.into_values().collect::<Vec<_>>();
128    hosts.sort_by_key(|host| std::cmp::Reverse(host.total_requests));
129
130    suspicious_requests.sort_by_key(|req| std::cmp::Reverse(req.status));
131    if suspicious_requests.len() > 200 {
132        suspicious_requests.truncate(200);
133    }
134
135    Ok(InvestigationReport {
136        page_title: parsed.page_title,
137        total_requests,
138        blocked_requests,
139        status_histogram,
140        resource_type_histogram,
141        provider_histogram,
142        marker_histogram,
143        top_markers,
144        hosts,
145        suspicious_requests,
146        aggregate,
147        target_class: None,
148    })
149}
150
151/// Build an investigation report from a HAR payload using an external cache and explicit target class.
152///
153/// # Errors
154///
155/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
156#[cfg(feature = "caching")]
157pub fn investigate_har_cached_with_target_class(
158    har_json: &str,
159    target_class: TargetClass,
160    cache: &dyn InvestigationReportCache,
161) -> Result<InvestigationReport, har::HarError> {
162    let key = investigation_cache_key(har_json, target_class);
163    if let Some(report) = cache.get(&key) {
164        return Ok(report);
165    }
166
167    let mut report = investigate_har(har_json)?;
168    report.target_class = Some(target_class);
169    cache.put(key, report.clone());
170    Ok(report)
171}
172
173/// Build an investigation report from a HAR payload using a cache and the conservative Unknown class.
174///
175/// # Errors
176///
177/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
178#[cfg(feature = "caching")]
179pub fn investigate_har_cached(
180    har_json: &str,
181    cache: &dyn InvestigationReportCache,
182) -> Result<InvestigationReport, har::HarError> {
183    investigate_har_cached_with_target_class(har_json, TargetClass::Unknown, cache)
184}
185
186/// Compare a baseline and candidate investigation report.
187#[must_use]
188pub fn compare_reports(
189    baseline: &InvestigationReport,
190    candidate: &InvestigationReport,
191) -> InvestigationDiff {
192    let baseline_ratio = blocked_ratio(baseline.blocked_requests, baseline.total_requests);
193    let candidate_ratio = blocked_ratio(candidate.blocked_requests, candidate.total_requests);
194    let blocked_ratio_delta = candidate_ratio - baseline_ratio;
195
196    let mut provider_delta: BTreeMap<AntiBotProvider, i64> = BTreeMap::new();
197    let all_providers =
198        collect_provider_keys(&baseline.provider_histogram, &candidate.provider_histogram);
199    for provider in all_providers {
200        let base = baseline
201            .provider_histogram
202            .get(&provider)
203            .copied()
204            .unwrap_or(0);
205        let cand = candidate
206            .provider_histogram
207            .get(&provider)
208            .copied()
209            .unwrap_or(0);
210
211        let cand_i64 = i64::try_from(cand).unwrap_or(i64::MAX);
212        let base_i64 = i64::try_from(base).unwrap_or(i64::MAX);
213
214        let _ = provider_delta.insert(provider, cand_i64.saturating_sub(base_i64));
215    }
216
217    let baseline_markers = baseline
218        .marker_histogram
219        .keys()
220        .cloned()
221        .collect::<BTreeSet<_>>();
222    let candidate_markers = candidate
223        .marker_histogram
224        .keys()
225        .cloned()
226        .collect::<BTreeSet<_>>();
227    let new_markers = candidate_markers
228        .difference(&baseline_markers)
229        .cloned()
230        .collect::<Vec<_>>();
231
232    InvestigationDiff {
233        baseline_total_requests: baseline.total_requests,
234        candidate_total_requests: candidate.total_requests,
235        baseline_blocked_requests: baseline.blocked_requests,
236        candidate_blocked_requests: candidate.blocked_requests,
237        blocked_ratio_delta,
238        likely_regression: blocked_ratio_delta >= 0.02,
239        provider_delta,
240        new_markers,
241    }
242}
243
244/// Infer operational requirements from an investigation report using explicit SLO thresholds.
245///
246/// Uses the provided `target_class` to determine acceptable block ratios and applies SLO-aware
247/// assessment to requirement inference.
248///
249/// # Arguments
250///
251/// * `report` — Investigation report with metrics and provider signatures
252/// * `target_class` — Website classification for SLO thresholds (`Api`, `ContentSite`, `HighSecurity`, `Unknown`)
253///
254/// # Returns
255///
256/// Requirements profile incorporating SLO-based assessment for adaptive rate requirements.
257#[must_use]
258#[allow(clippy::too_many_lines)]
259pub fn infer_requirements_with_target_class(
260    report: &InvestigationReport,
261    target_class: TargetClass,
262) -> RequirementsProfile {
263    let mut requirements = Vec::new();
264
265    let blocked_ratio = blocked_ratio(report.blocked_requests, report.total_requests);
266    let marker_set = report
267        .top_markers
268        .iter()
269        .map(|marker| marker.marker.to_lowercase())
270        .collect::<BTreeSet<_>>();
271
272    let has_cloudflare = marker_set.iter().any(|m| {
273        m.contains("cf-ray") || m.contains("__cf_bm") || m.contains("cdn-cgi/challenge-platform")
274    });
275    let has_datadome = marker_set.iter().any(|m| {
276        m.contains("x-datadome")
277            || m.contains("x-dd-b")
278            || m.contains("datadome=")
279            || m.contains("captcha-delivery.com")
280    });
281
282    if has_cloudflare {
283        requirements.push(AntiBotRequirement {
284            id: "js_runtime_and_cookie_lifecycle".to_string(),
285            title: "Maintain JS-capable session flow".to_string(),
286            why: "Challenge markers indicate server-side scoring that expects browser-like session progression.".to_string(),
287            evidence: select_marker_evidence(&marker_set, &["cf-ray", "__cf_bm", "cdn-cgi/challenge-platform"]),
288            level: RequirementLevel::High,
289        });
290    }
291
292    if has_datadome {
293        requirements.push(AntiBotRequirement {
294            id: "fingerprint_and_identity_consistency".to_string(),
295            title: "Keep request identity consistent".to_string(),
296            why: "DataDome markers commonly correlate with strict consistency checks across headers, cookies, and connection profile.".to_string(),
297            evidence: select_marker_evidence(&marker_set, &["x-datadome", "x-dd-b", "datadome=", "captcha-delivery.com"]),
298            level: RequirementLevel::High,
299        });
300    }
301
302    // Use SLO framework for adaptive rate requirement based on target class
303    let slo = BlockedRatioSlo::for_class(target_class);
304    let (_acceptable, warning, critical) = slo.assess(blocked_ratio);
305
306    if warning || critical {
307        let level = if critical {
308            RequirementLevel::High
309        } else {
310            RequirementLevel::Medium
311        };
312        let why = if critical {
313            format!(
314                "Block ratio {:.1}% exceeds critical SLO threshold ({:.1}%) for {:?}",
315                blocked_ratio * 100.0,
316                slo.critical * 100.0,
317                target_class
318            )
319        } else {
320            format!(
321                "Block ratio {:.1}% exceeds warning SLO threshold ({:.1}%) for {:?}",
322                blocked_ratio * 100.0,
323                slo.warning * 100.0,
324                target_class
325            )
326        };
327
328        requirements.push(AntiBotRequirement {
329            id: "adaptive_rate_and_retry_budget".to_string(),
330            title: "Apply adaptive pacing and bounded retries".to_string(),
331            why,
332            evidence: vec![format!(
333                "blocked_ratio={blocked_ratio:.4}, slo_acceptable={:.4}",
334                slo.acceptable
335            )],
336            level,
337        });
338    }
339
340    let status_429 = report.status_histogram.get(&429).copied().unwrap_or(0);
341    if status_429 > 0 {
342        requirements.push(AntiBotRequirement {
343            id: "rate_limit_backoff".to_string(),
344            title: "Honor explicit rate limits".to_string(),
345            why: "Observed HTTP 429 responses indicate throttling pressure.".to_string(),
346            evidence: vec![format!("status_429={status_429}")],
347            level: RequirementLevel::Medium,
348        });
349    }
350
351    let preflight_count = report
352        .resource_type_histogram
353        .get("preflight")
354        .copied()
355        .unwrap_or(0);
356    if preflight_count > 0 {
357        requirements.push(AntiBotRequirement {
358            id: "cors_and_header_fidelity".to_string(),
359            title: "Preserve browser-like CORS/header flow".to_string(),
360            why: "Preflight-heavy traffic can fail if adapter behavior diverges from browser request choreography.".to_string(),
361            evidence: vec![format!("preflight_requests={preflight_count}")],
362            level: RequirementLevel::Medium,
363        });
364    }
365
366    let recommendation = recommend_strategy(
367        report.aggregate.provider,
368        blocked_ratio,
369        has_cloudflare,
370        has_datadome,
371        &requirements,
372    );
373
374    RequirementsProfile {
375        provider: report.aggregate.provider,
376        confidence: report.aggregate.confidence,
377        requirements,
378        recommendation,
379    }
380}
381
382/// Infer operational requirements and adapter strategy from an investigation report.
383///
384/// Uses the `target_class` from the report if available; otherwise defaults to `Unknown`.
385/// For explicit SLO control, use [`infer_requirements_with_target_class`] instead.
386#[must_use]
387pub fn infer_requirements(report: &InvestigationReport) -> RequirementsProfile {
388    let target_class = report.target_class.unwrap_or(TargetClass::Unknown);
389    infer_requirements_with_target_class(report, target_class)
390}
391
392fn aggregate_detection(requests: &[HarRequestSummary]) -> Detection {
393    let mut provider_counts: BTreeMap<AntiBotProvider, u64> = BTreeMap::new();
394    let mut markers: Vec<String> = Vec::new();
395
396    for req in requests {
397        if req.detection.provider != AntiBotProvider::Unknown {
398            let entry = provider_counts.entry(req.detection.provider).or_insert(0);
399            *entry = entry.saturating_add(1);
400        }
401        markers.extend(req.detection.markers.iter().cloned());
402    }
403
404    if provider_counts.is_empty() {
405        return Detection {
406            provider: AntiBotProvider::Unknown,
407            confidence: 0.0,
408            markers: Vec::new(),
409        };
410    }
411
412    let mut ordered = provider_counts.into_iter().collect::<Vec<_>>();
413    ordered.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
414
415    if let Some((provider, top_count)) = ordered.first().copied() {
416        let second_count = ordered.get(1).map_or(0, |pair| pair.1);
417        let confidence = if top_count + second_count == 0 {
418            0.0
419        } else {
420            to_f64(top_count) / to_f64(top_count + second_count)
421        };
422
423        Detection {
424            provider,
425            confidence,
426            markers,
427        }
428    } else {
429        Detection {
430            provider: AntiBotProvider::Unknown,
431            confidence: 0.0,
432            markers,
433        }
434    }
435}
436
437fn blocked_ratio(blocked: u64, total: u64) -> f64 {
438    if total == 0 {
439        0.0
440    } else {
441        to_f64(blocked) / to_f64(total)
442    }
443}
444
445#[allow(clippy::cast_precision_loss)]
446const fn to_f64(value: u64) -> f64 {
447    value as f64
448}
449
450fn collect_provider_keys(
451    left: &BTreeMap<AntiBotProvider, u64>,
452    right: &BTreeMap<AntiBotProvider, u64>,
453) -> BTreeSet<AntiBotProvider> {
454    left.keys().chain(right.keys()).copied().collect()
455}
456
457fn extract_host(url: &str) -> String {
458    if let Some((_, rest)) = url.split_once("://") {
459        let before_path = rest.split('/').next().unwrap_or(rest);
460        let without_auth = before_path.split('@').next_back().unwrap_or(before_path);
461        without_auth.to_string()
462    } else {
463        url.split('/').next().unwrap_or(url).to_string()
464    }
465}
466
467fn select_marker_evidence(marker_set: &BTreeSet<String>, needles: &[&str]) -> Vec<String> {
468    let mut out = Vec::new();
469    for marker in marker_set {
470        if needles.iter().any(|needle| marker.contains(needle)) {
471            out.push(marker.clone());
472        }
473    }
474    out
475}
476
477fn recommend_strategy(
478    provider: AntiBotProvider,
479    blocked_ratio: f64,
480    has_cloudflare: bool,
481    has_datadome: bool,
482    requirements: &[AntiBotRequirement],
483) -> IntegrationRecommendation {
484    let mut required_stygian_features = Vec::new();
485    let mut config_hints = BTreeMap::new();
486
487    let strategy = if has_datadome {
488        required_stygian_features.push("stygian-browser".to_string());
489        required_stygian_features.push("stygian-proxy".to_string());
490        let _ = config_hints.insert("proxy.rotation".to_string(), "per-domain".to_string());
491        let _ = config_hints.insert("session.sticky_ttl_secs".to_string(), "600".to_string());
492        let _ = config_hints.insert(
493            "webrtc.policy".to_string(),
494            "disable_non_proxied_udp".to_string(),
495        );
496        AdapterStrategy::StickyProxy
497    } else if has_cloudflare || blocked_ratio >= 0.05 {
498        required_stygian_features.push("stygian-browser".to_string());
499        let _ = config_hints.insert("request.rate_limit.rps".to_string(), "1-3".to_string());
500        let _ = config_hints.insert(
501            "retry.backoff".to_string(),
502            "exponential+jitter".to_string(),
503        );
504        AdapterStrategy::BrowserStealth
505    } else if provider == AntiBotProvider::Unknown && requirements.is_empty() {
506        required_stygian_features.push("stygian-graph".to_string());
507        AdapterStrategy::DirectHttp
508    } else {
509        required_stygian_features.push("stygian-graph".to_string());
510        required_stygian_features.push("stygian-charon".to_string());
511        AdapterStrategy::InvestigateOnly
512    };
513
514    let rationale = match strategy {
515        AdapterStrategy::StickyProxy => {
516            "Provider markers suggest identity/session continuity and proxy stickiness are primary requirements."
517                .to_string()
518        }
519        AdapterStrategy::BrowserStealth => {
520            "Challenge density indicates browser-backed execution with conservative pacing is required."
521                .to_string()
522        }
523        AdapterStrategy::DirectHttp => {
524            "No strong anti-bot markers were detected; direct HTTP path appears sufficient."
525                .to_string()
526        }
527        AdapterStrategy::SessionWarmup => {
528            "Session priming is recommended before collection workloads."
529                .to_string()
530        }
531        AdapterStrategy::InvestigateOnly => {
532            "Signals are mixed; keep adaptive telemetry enabled and gather additional baseline runs."
533                .to_string()
534        }
535    };
536
537    IntegrationRecommendation {
538        strategy,
539        rationale,
540        required_stygian_features,
541        config_hints,
542    }
543}
544
545#[cfg(test)]
546#[allow(
547    clippy::unwrap_used,
548    clippy::expect_used,
549    clippy::panic,
550    clippy::indexing_slicing
551)]
552mod tests {
553    use super::*;
554
555    #[cfg(feature = "caching")]
556    use std::{num::NonZeroUsize, time::Duration};
557
558    #[cfg(feature = "caching")]
559    use crate::cache::MemoryInvestigationCache;
560
561    #[test]
562    fn compare_reports_flags_block_ratio_regression() {
563        let baseline = InvestigationReport {
564            page_title: None,
565            total_requests: 100,
566            blocked_requests: 5,
567            status_histogram: BTreeMap::new(),
568            resource_type_histogram: BTreeMap::new(),
569            provider_histogram: BTreeMap::new(),
570            marker_histogram: BTreeMap::new(),
571            top_markers: Vec::new(),
572            hosts: Vec::new(),
573            suspicious_requests: Vec::new(),
574            aggregate: Detection {
575                provider: AntiBotProvider::Unknown,
576                confidence: 0.0,
577                markers: Vec::new(),
578            },
579            target_class: None,
580        };
581
582        let candidate = InvestigationReport {
583            blocked_requests: 12,
584            ..baseline.clone()
585        };
586
587        let diff = compare_reports(&baseline, &candidate);
588        assert!(diff.blocked_ratio_delta > 0.02);
589        assert!(diff.likely_regression);
590    }
591
592    #[test]
593    fn infer_requirements_identifies_cloudflare_signals() {
594        let mut status_histogram = BTreeMap::new();
595        let _ = status_histogram.insert(403, 7);
596
597        let mut resource_histogram = BTreeMap::new();
598        let _ = resource_histogram.insert("document".to_string(), 10);
599
600        let report = InvestigationReport {
601            page_title: Some("https://example.com".to_string()),
602            total_requests: 10,
603            blocked_requests: 7,
604            status_histogram,
605            resource_type_histogram: resource_histogram,
606            provider_histogram: BTreeMap::new(),
607            marker_histogram: BTreeMap::from([
608                ("cf-ray".to_string(), 5),
609                ("__cf_bm".to_string(), 5),
610            ]),
611            top_markers: vec![
612                MarkerCount {
613                    marker: "cf-ray".to_string(),
614                    count: 5,
615                },
616                MarkerCount {
617                    marker: "__cf_bm".to_string(),
618                    count: 5,
619                },
620            ],
621            hosts: Vec::new(),
622            suspicious_requests: Vec::new(),
623            aggregate: Detection {
624                provider: AntiBotProvider::Cloudflare,
625                confidence: 0.9,
626                markers: vec!["cf-ray".to_string()],
627            },
628            target_class: None,
629        };
630
631        let profile = infer_requirements(&report);
632        assert_eq!(profile.provider, AntiBotProvider::Cloudflare);
633        assert!(!profile.requirements.is_empty());
634        assert_eq!(
635            profile.recommendation.strategy,
636            AdapterStrategy::BrowserStealth
637        );
638    }
639
640    #[test]
641    fn infer_requirements_applies_slo_for_api_target() {
642        // 20% blocked ratio is critical for API targets (critical at 15%)
643        let mut status_histogram = BTreeMap::new();
644        let _ = status_histogram.insert(403, 2);
645        let _ = status_histogram.insert(429, 3);
646
647        let report = InvestigationReport {
648            page_title: None,
649            total_requests: 25,
650            blocked_requests: 5,
651            status_histogram,
652            resource_type_histogram: BTreeMap::new(),
653            provider_histogram: BTreeMap::new(),
654            marker_histogram: BTreeMap::new(),
655            top_markers: Vec::new(),
656            hosts: Vec::new(),
657            suspicious_requests: Vec::new(),
658            aggregate: Detection {
659                provider: AntiBotProvider::Unknown,
660                confidence: 0.0,
661                markers: Vec::new(),
662            },
663            target_class: Some(TargetClass::Api),
664        };
665
666        let profile = infer_requirements(&report);
667
668        // Should find adaptive_rate_and_retry_budget requirement with High level (critical)
669        let adaptive_req = profile
670            .requirements
671            .iter()
672            .find(|r| r.id == "adaptive_rate_and_retry_budget");
673        assert!(adaptive_req.is_some());
674        if let Some(req) = adaptive_req {
675            assert_eq!(req.level, RequirementLevel::High);
676        }
677    }
678
679    #[test]
680    fn infer_requirements_with_target_class_respects_slo_thresholds() {
681        // 20% blocked ratio assessment differs by target class
682        let report = InvestigationReport {
683            page_title: None,
684            total_requests: 100,
685            blocked_requests: 20,
686            status_histogram: BTreeMap::from([(403, 20)]),
687            resource_type_histogram: BTreeMap::new(),
688            provider_histogram: BTreeMap::new(),
689            marker_histogram: BTreeMap::new(),
690            top_markers: Vec::new(),
691            hosts: Vec::new(),
692            suspicious_requests: Vec::new(),
693            aggregate: Detection {
694                provider: AntiBotProvider::Unknown,
695                confidence: 0.0,
696                markers: Vec::new(),
697            },
698            target_class: None,
699        };
700
701        // For API: 20% is critical (threshold 15%)
702        let api_profile = infer_requirements_with_target_class(&report, TargetClass::Api);
703        let api_req = api_profile
704            .requirements
705            .iter()
706            .find(|r| r.id == "adaptive_rate_and_retry_budget");
707        assert!(api_req.is_some());
708        if let Some(req) = api_req {
709            assert_eq!(req.level, RequirementLevel::High); // Critical
710        }
711
712        // For ContentSite: 20% is in warning zone (acceptable 15%, warning 25%)
713        let content_profile =
714            infer_requirements_with_target_class(&report, TargetClass::ContentSite);
715        let content_req = content_profile
716            .requirements
717            .iter()
718            .find(|r| r.id == "adaptive_rate_and_retry_budget");
719        assert!(content_req.is_some());
720        if let Some(req) = content_req {
721            assert_eq!(req.level, RequirementLevel::Medium); // Warning
722        }
723
724        // For HighSecurity: 20% is acceptable (threshold 30%)
725        let high_sec_profile =
726            infer_requirements_with_target_class(&report, TargetClass::HighSecurity);
727        let high_sec_req = high_sec_profile
728            .requirements
729            .iter()
730            .find(|r| r.id == "adaptive_rate_and_retry_budget");
731        assert!(high_sec_req.is_none()); // Below acceptable, no requirement
732    }
733
734    #[test]
735    fn infer_requirements_below_slo_has_no_adaptive_requirement() {
736        // 5% blocked ratio is acceptable for API targets
737        let report = InvestigationReport {
738            page_title: None,
739            total_requests: 100,
740            blocked_requests: 5,
741            status_histogram: BTreeMap::from([(403, 5)]),
742            resource_type_histogram: BTreeMap::new(),
743            provider_histogram: BTreeMap::new(),
744            marker_histogram: BTreeMap::new(),
745            top_markers: Vec::new(),
746            hosts: Vec::new(),
747            suspicious_requests: Vec::new(),
748            aggregate: Detection {
749                provider: AntiBotProvider::Unknown,
750                confidence: 0.0,
751                markers: Vec::new(),
752            },
753            target_class: None,
754        };
755
756        let profile = infer_requirements_with_target_class(&report, TargetClass::Api);
757
758        // Should NOT find adaptive_rate_and_retry_budget requirement (acceptable for API)
759        let adaptive_req = profile
760            .requirements
761            .iter()
762            .find(|r| r.id == "adaptive_rate_and_retry_budget");
763        assert!(adaptive_req.is_none());
764    }
765
766    #[cfg(feature = "caching")]
767    #[test]
768    fn cached_investigation_sets_target_class_and_reuses_cached_report() {
769        let capacity = NonZeroUsize::new(8).unwrap_or(NonZeroUsize::MIN);
770        let cache = MemoryInvestigationCache::new(capacity, Duration::from_mins(1));
771        let har_json = r#"{
772            "log": {
773                "version": "1.2.0",
774                "creator": {"name": "test", "version": "1.0"},
775                "pages": [{"id": "page1", "title": "test", "startedDateTime": "2025-01-01T00:00:00Z", "pageTimings": {"onLoad": 0}}],
776                "entries": []
777            }
778        }"#;
779
780        let first_result =
781            investigate_har_cached_with_target_class(har_json, TargetClass::Api, &cache);
782        assert!(first_result.is_ok(), "cached investigation should succeed");
783        let second_result =
784            investigate_har_cached_with_target_class(har_json, TargetClass::Api, &cache);
785        assert!(
786            second_result.is_ok(),
787            "cached investigation should hit cache"
788        );
789
790        let Ok(first) = first_result else {
791            return;
792        };
793        let Ok(second) = second_result else {
794            return;
795        };
796
797        assert_eq!(first.target_class, Some(TargetClass::Api));
798        assert_eq!(second.target_class, Some(TargetClass::Api));
799        assert_eq!(first, second);
800    }
801}