Skip to main content

stygian_charon/
investigation.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use crate::analyzer::AnalyzerProfile;
4#[cfg(feature = "caching")]
5use crate::cache::{InvestigationReportCache, investigation_cache_key};
6use crate::classifier::{classify_transaction, classify_transaction_with_profile};
7use crate::har;
8use crate::types::{
9    AdapterStrategy, AntiBotProvider, AntiBotRequirement, BlockedRatioSlo, Detection,
10    HarRequestSummary, HostSummary, IntegrationRecommendation, InvestigationDiff,
11    InvestigationReport, MarkerCount, RequirementLevel, RequirementsProfile, TargetClass,
12    TransactionView,
13};
14
15/// Build an investigation report from a HAR payload.
16///
17/// # Errors
18///
19/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
20pub fn investigate_har(har_json: &str) -> Result<InvestigationReport, har::HarError> {
21    investigate_har_with_classifier(har_json, classify_transaction)
22}
23
24/// Build an investigation report from a HAR payload using an explicit analyzer profile.
25///
26/// # Errors
27///
28/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
29pub fn investigate_har_with_profile(
30    har_json: &str,
31    profile: &AnalyzerProfile,
32) -> Result<InvestigationReport, har::HarError> {
33    investigate_har_with_classifier(har_json, |tx| {
34        classify_transaction_with_profile(tx, profile)
35    })
36}
37
38fn investigate_har_with_classifier<F>(
39    har_json: &str,
40    classify: F,
41) -> Result<InvestigationReport, har::HarError>
42where
43    F: Fn(&TransactionView) -> Detection,
44{
45    let parsed = har::parse_har_transactions(har_json)?;
46
47    let mut status_histogram: BTreeMap<u16, u64> = BTreeMap::new();
48    let mut resource_type_histogram: BTreeMap<String, u64> = BTreeMap::new();
49    let mut provider_histogram: BTreeMap<AntiBotProvider, u64> = BTreeMap::new();
50    let mut marker_histogram: BTreeMap<String, u64> = BTreeMap::new();
51    let mut host_accumulator: BTreeMap<String, HostSummary> = BTreeMap::new();
52
53    let mut blocked_requests = 0_u64;
54    let mut all_requests: Vec<HarRequestSummary> = Vec::new();
55    let mut suspicious_requests: Vec<HarRequestSummary> = Vec::new();
56
57    for req in parsed.requests {
58        let detection = classify(&req.transaction);
59
60        let summary = HarRequestSummary {
61            url: req.transaction.url.clone(),
62            status: req.transaction.status,
63            resource_type: req.resource_type.clone(),
64            detection,
65        };
66
67        let status_entry = status_histogram.entry(summary.status).or_insert(0);
68        *status_entry = status_entry.saturating_add(1);
69
70        let resource_label = summary
71            .resource_type
72            .clone()
73            .unwrap_or_else(|| "unknown".to_string());
74        let resource_entry = resource_type_histogram.entry(resource_label).or_insert(0);
75        *resource_entry = resource_entry.saturating_add(1);
76
77        let provider_entry = provider_histogram
78            .entry(summary.detection.provider)
79            .or_insert(0);
80        *provider_entry = provider_entry.saturating_add(1);
81
82        for marker in &summary.detection.markers {
83            let marker_entry = marker_histogram.entry(marker.clone()).or_insert(0);
84            *marker_entry = marker_entry.saturating_add(1);
85        }
86
87        let is_blocked = summary.status == 403 || summary.status == 429;
88        if is_blocked {
89            blocked_requests = blocked_requests.saturating_add(1);
90        }
91
92        let host = extract_host(&summary.url);
93        let host_summary = host_accumulator.entry(host.clone()).or_insert(HostSummary {
94            host,
95            total_requests: 0,
96            blocked_requests: 0,
97        });
98        host_summary.total_requests = host_summary.total_requests.saturating_add(1);
99        if is_blocked {
100            host_summary.blocked_requests = host_summary.blocked_requests.saturating_add(1);
101        }
102
103        let is_suspicious = is_blocked || summary.detection.provider != AntiBotProvider::Unknown;
104        if is_suspicious {
105            suspicious_requests.push(summary.clone());
106        }
107
108        all_requests.push(summary);
109    }
110
111    let total_requests = u64::try_from(all_requests.len()).unwrap_or(u64::MAX);
112
113    let aggregate = aggregate_detection(&all_requests);
114
115    let mut top_markers = marker_histogram
116        .iter()
117        .map(|(marker, count)| MarkerCount {
118            marker: marker.clone(),
119            count: *count,
120        })
121        .collect::<Vec<_>>();
122    top_markers.sort_by_key(|marker| std::cmp::Reverse(marker.count));
123    if top_markers.len() > 25 {
124        top_markers.truncate(25);
125    }
126
127    let mut hosts = host_accumulator.into_values().collect::<Vec<_>>();
128    hosts.sort_by_key(|host| std::cmp::Reverse(host.total_requests));
129
130    suspicious_requests.sort_by_key(|req| std::cmp::Reverse(req.status));
131    if suspicious_requests.len() > 200 {
132        suspicious_requests.truncate(200);
133    }
134
135    Ok(InvestigationReport {
136        page_title: parsed.page_title,
137        total_requests,
138        blocked_requests,
139        status_histogram,
140        resource_type_histogram,
141        provider_histogram,
142        marker_histogram,
143        top_markers,
144        hosts,
145        suspicious_requests,
146        aggregate,
147        target_class: None,
148    })
149}
150
151/// Build an investigation report from a HAR payload using an external cache and explicit target class.
152///
153/// # Errors
154///
155/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
156#[cfg(feature = "caching")]
157pub fn investigate_har_cached_with_target_class(
158    har_json: &str,
159    target_class: TargetClass,
160    cache: &dyn InvestigationReportCache,
161) -> Result<InvestigationReport, har::HarError> {
162    let key = investigation_cache_key(har_json, target_class);
163    if let Some(report) = cache.get(&key) {
164        return Ok(report);
165    }
166
167    let mut report = investigate_har(har_json)?;
168    report.target_class = Some(target_class);
169    cache.put(key, report.clone());
170    Ok(report)
171}
172
173/// Build an investigation report from a HAR payload using a cache and the conservative Unknown class.
174///
175/// # Errors
176///
177/// Returns [`har::HarError`] when the HAR payload is invalid or malformed.
178#[cfg(feature = "caching")]
179pub fn investigate_har_cached(
180    har_json: &str,
181    cache: &dyn InvestigationReportCache,
182) -> Result<InvestigationReport, har::HarError> {
183    investigate_har_cached_with_target_class(har_json, TargetClass::Unknown, cache)
184}
185
186/// Compare a baseline and candidate investigation report.
187#[must_use]
188pub fn compare_reports(
189    baseline: &InvestigationReport,
190    candidate: &InvestigationReport,
191) -> InvestigationDiff {
192    let baseline_ratio = blocked_ratio(baseline.blocked_requests, baseline.total_requests);
193    let candidate_ratio = blocked_ratio(candidate.blocked_requests, candidate.total_requests);
194    let blocked_ratio_delta = candidate_ratio - baseline_ratio;
195
196    let mut provider_delta: BTreeMap<AntiBotProvider, i64> = BTreeMap::new();
197    let all_providers =
198        collect_provider_keys(&baseline.provider_histogram, &candidate.provider_histogram);
199    for provider in all_providers {
200        let base = baseline
201            .provider_histogram
202            .get(&provider)
203            .copied()
204            .unwrap_or(0);
205        let cand = candidate
206            .provider_histogram
207            .get(&provider)
208            .copied()
209            .unwrap_or(0);
210
211        let cand_i64 = i64::try_from(cand).unwrap_or(i64::MAX);
212        let base_i64 = i64::try_from(base).unwrap_or(i64::MAX);
213
214        let _ = provider_delta.insert(provider, cand_i64.saturating_sub(base_i64));
215    }
216
217    let baseline_markers = baseline
218        .marker_histogram
219        .keys()
220        .cloned()
221        .collect::<BTreeSet<_>>();
222    let candidate_markers = candidate
223        .marker_histogram
224        .keys()
225        .cloned()
226        .collect::<BTreeSet<_>>();
227    let new_markers = candidate_markers
228        .difference(&baseline_markers)
229        .cloned()
230        .collect::<Vec<_>>();
231
232    InvestigationDiff {
233        baseline_total_requests: baseline.total_requests,
234        candidate_total_requests: candidate.total_requests,
235        baseline_blocked_requests: baseline.blocked_requests,
236        candidate_blocked_requests: candidate.blocked_requests,
237        blocked_ratio_delta,
238        likely_regression: blocked_ratio_delta >= 0.02,
239        provider_delta,
240        new_markers,
241    }
242}
243
244/// Infer operational requirements from an investigation report using explicit SLO thresholds.
245///
246/// Uses the provided `target_class` to determine acceptable block ratios and applies SLO-aware
247/// assessment to requirement inference.
248///
249/// # Arguments
250///
251/// * `report` — Investigation report with metrics and provider signatures
252/// * `target_class` — Website classification for SLO thresholds (`Api`, `ContentSite`, `HighSecurity`, `Unknown`)
253///
254/// # Returns
255///
256/// Requirements profile incorporating SLO-based assessment for adaptive rate requirements.
257#[must_use]
258#[allow(clippy::too_many_lines)]
259pub fn infer_requirements_with_target_class(
260    report: &InvestigationReport,
261    target_class: TargetClass,
262) -> RequirementsProfile {
263    let mut requirements = Vec::new();
264
265    let blocked_ratio = blocked_ratio(report.blocked_requests, report.total_requests);
266    let marker_set = report
267        .top_markers
268        .iter()
269        .map(|marker| marker.marker.to_lowercase())
270        .collect::<BTreeSet<_>>();
271
272    let has_cloudflare = marker_set.iter().any(|m| {
273        m.contains("cf-ray") || m.contains("__cf_bm") || m.contains("cdn-cgi/challenge-platform")
274    });
275    let has_datadome = marker_set.iter().any(|m| {
276        m.contains("x-datadome")
277            || m.contains("x-dd-b")
278            || m.contains("datadome=")
279            || m.contains("captcha-delivery.com")
280    });
281
282    if has_cloudflare {
283        requirements.push(AntiBotRequirement {
284            id: "js_runtime_and_cookie_lifecycle".to_string(),
285            title: "Maintain JS-capable session flow".to_string(),
286            why: "Challenge markers indicate server-side scoring that expects browser-like session progression.".to_string(),
287            evidence: select_marker_evidence(&marker_set, &["cf-ray", "__cf_bm", "cdn-cgi/challenge-platform"]),
288            level: RequirementLevel::High,
289        });
290    }
291
292    if has_datadome {
293        requirements.push(AntiBotRequirement {
294            id: "fingerprint_and_identity_consistency".to_string(),
295            title: "Keep request identity consistent".to_string(),
296            why: "DataDome markers commonly correlate with strict consistency checks across headers, cookies, and connection profile.".to_string(),
297            evidence: select_marker_evidence(&marker_set, &["x-datadome", "x-dd-b", "datadome=", "captcha-delivery.com"]),
298            level: RequirementLevel::High,
299        });
300    }
301
302    // Use SLO framework for adaptive rate requirement based on target class
303    let slo = BlockedRatioSlo::for_class(target_class);
304    let (_acceptable, warning, critical) = slo.assess(blocked_ratio);
305
306    if warning || critical {
307        let level = if critical {
308            RequirementLevel::High
309        } else {
310            RequirementLevel::Medium
311        };
312        let why = if critical {
313            format!(
314                "Block ratio {:.1}% exceeds critical SLO threshold ({:.1}%) for {:?}",
315                blocked_ratio * 100.0,
316                slo.critical * 100.0,
317                target_class
318            )
319        } else {
320            format!(
321                "Block ratio {:.1}% exceeds warning SLO threshold ({:.1}%) for {:?}",
322                blocked_ratio * 100.0,
323                slo.warning * 100.0,
324                target_class
325            )
326        };
327
328        requirements.push(AntiBotRequirement {
329            id: "adaptive_rate_and_retry_budget".to_string(),
330            title: "Apply adaptive pacing and bounded retries".to_string(),
331            why,
332            evidence: vec![format!(
333                "blocked_ratio={blocked_ratio:.4}, slo_acceptable={:.4}",
334                slo.acceptable
335            )],
336            level,
337        });
338    }
339
340    let status_429 = report.status_histogram.get(&429).copied().unwrap_or(0);
341    if status_429 > 0 {
342        requirements.push(AntiBotRequirement {
343            id: "rate_limit_backoff".to_string(),
344            title: "Honor explicit rate limits".to_string(),
345            why: "Observed HTTP 429 responses indicate throttling pressure.".to_string(),
346            evidence: vec![format!("status_429={status_429}")],
347            level: RequirementLevel::Medium,
348        });
349    }
350
351    let preflight_count = report
352        .resource_type_histogram
353        .get("preflight")
354        .copied()
355        .unwrap_or(0);
356    if preflight_count > 0 {
357        requirements.push(AntiBotRequirement {
358            id: "cors_and_header_fidelity".to_string(),
359            title: "Preserve browser-like CORS/header flow".to_string(),
360            why: "Preflight-heavy traffic can fail if adapter behavior diverges from browser request choreography.".to_string(),
361            evidence: vec![format!("preflight_requests={preflight_count}")],
362            level: RequirementLevel::Medium,
363        });
364    }
365
366    let recommendation = recommend_strategy(
367        report.aggregate.provider,
368        blocked_ratio,
369        has_cloudflare,
370        has_datadome,
371        &requirements,
372    );
373
374    RequirementsProfile {
375        provider: report.aggregate.provider,
376        confidence: report.aggregate.confidence,
377        requirements,
378        recommendation,
379    }
380}
381
382/// Infer operational requirements and adapter strategy from an investigation report.
383///
384/// Uses the `target_class` from the report if available; otherwise defaults to `Unknown`.
385/// For explicit SLO control, use [`infer_requirements_with_target_class`] instead.
386#[must_use]
387pub fn infer_requirements(report: &InvestigationReport) -> RequirementsProfile {
388    let target_class = report.target_class.unwrap_or(TargetClass::Unknown);
389    infer_requirements_with_target_class(report, target_class)
390}
391
392fn aggregate_detection(requests: &[HarRequestSummary]) -> Detection {
393    let mut provider_counts: BTreeMap<AntiBotProvider, u64> = BTreeMap::new();
394    let mut markers: Vec<String> = Vec::new();
395
396    for req in requests {
397        if req.detection.provider != AntiBotProvider::Unknown {
398            let entry = provider_counts.entry(req.detection.provider).or_insert(0);
399            *entry = entry.saturating_add(1);
400        }
401        markers.extend(req.detection.markers.iter().cloned());
402    }
403
404    if provider_counts.is_empty() {
405        return Detection {
406            provider: AntiBotProvider::Unknown,
407            confidence: 0.0,
408            markers: Vec::new(),
409        };
410    }
411
412    let mut ordered = provider_counts.into_iter().collect::<Vec<_>>();
413    ordered.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
414
415    if let Some((provider, top_count)) = ordered.first().copied() {
416        let second_count = ordered.get(1).map_or(0, |pair| pair.1);
417        let confidence = if top_count + second_count == 0 {
418            0.0
419        } else {
420            to_f64(top_count) / to_f64(top_count + second_count)
421        };
422
423        Detection {
424            provider,
425            confidence,
426            markers,
427        }
428    } else {
429        Detection {
430            provider: AntiBotProvider::Unknown,
431            confidence: 0.0,
432            markers,
433        }
434    }
435}
436
437fn blocked_ratio(blocked: u64, total: u64) -> f64 {
438    if total == 0 {
439        0.0
440    } else {
441        to_f64(blocked) / to_f64(total)
442    }
443}
444
445#[allow(clippy::cast_precision_loss)]
446const fn to_f64(value: u64) -> f64 {
447    value as f64
448}
449
450fn collect_provider_keys(
451    left: &BTreeMap<AntiBotProvider, u64>,
452    right: &BTreeMap<AntiBotProvider, u64>,
453) -> BTreeSet<AntiBotProvider> {
454    left.keys().chain(right.keys()).copied().collect()
455}
456
457fn extract_host(url: &str) -> String {
458    if let Some((_, rest)) = url.split_once("://") {
459        let before_path = rest.split('/').next().unwrap_or(rest);
460        let without_auth = before_path.split('@').next_back().unwrap_or(before_path);
461        without_auth.to_string()
462    } else {
463        url.split('/').next().unwrap_or(url).to_string()
464    }
465}
466
467fn select_marker_evidence(marker_set: &BTreeSet<String>, needles: &[&str]) -> Vec<String> {
468    let mut out = Vec::new();
469    for marker in marker_set {
470        if needles.iter().any(|needle| marker.contains(needle)) {
471            out.push(marker.clone());
472        }
473    }
474    out
475}
476
477fn recommend_strategy(
478    provider: AntiBotProvider,
479    blocked_ratio: f64,
480    has_cloudflare: bool,
481    has_datadome: bool,
482    requirements: &[AntiBotRequirement],
483) -> IntegrationRecommendation {
484    let mut required_stygian_features = Vec::new();
485    let mut config_hints = BTreeMap::new();
486
487    let strategy = if has_datadome {
488        required_stygian_features.push("stygian-browser".to_string());
489        required_stygian_features.push("stygian-proxy".to_string());
490        let _ = config_hints.insert("proxy.rotation".to_string(), "per-domain".to_string());
491        let _ = config_hints.insert("session.sticky_ttl_secs".to_string(), "600".to_string());
492        let _ = config_hints.insert(
493            "webrtc.policy".to_string(),
494            "disable_non_proxied_udp".to_string(),
495        );
496        AdapterStrategy::StickyProxy
497    } else if has_cloudflare || blocked_ratio >= 0.05 {
498        required_stygian_features.push("stygian-browser".to_string());
499        let _ = config_hints.insert("request.rate_limit.rps".to_string(), "1-3".to_string());
500        let _ = config_hints.insert(
501            "retry.backoff".to_string(),
502            "exponential+jitter".to_string(),
503        );
504        AdapterStrategy::BrowserStealth
505    } else if provider == AntiBotProvider::Unknown && requirements.is_empty() {
506        required_stygian_features.push("stygian-graph".to_string());
507        AdapterStrategy::DirectHttp
508    } else {
509        required_stygian_features.push("stygian-graph".to_string());
510        required_stygian_features.push("stygian-charon".to_string());
511        AdapterStrategy::InvestigateOnly
512    };
513
514    let rationale = match strategy {
515        AdapterStrategy::StickyProxy => {
516            "Provider markers suggest identity/session continuity and proxy stickiness are primary requirements."
517                .to_string()
518        }
519        AdapterStrategy::BrowserStealth => {
520            "Challenge density indicates browser-backed execution with conservative pacing is required."
521                .to_string()
522        }
523        AdapterStrategy::DirectHttp => {
524            "No strong anti-bot markers were detected; direct HTTP path appears sufficient."
525                .to_string()
526        }
527        AdapterStrategy::SessionWarmup => {
528            "Session priming is recommended before collection workloads."
529                .to_string()
530        }
531        AdapterStrategy::InvestigateOnly => {
532            "Signals are mixed; keep adaptive telemetry enabled and gather additional baseline runs."
533                .to_string()
534        }
535    };
536
537    IntegrationRecommendation {
538        strategy,
539        rationale,
540        required_stygian_features,
541        config_hints,
542    }
543}
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548
549    #[cfg(feature = "caching")]
550    use std::{num::NonZeroUsize, time::Duration};
551
552    #[cfg(feature = "caching")]
553    use crate::cache::MemoryInvestigationCache;
554
555    #[test]
556    fn compare_reports_flags_block_ratio_regression() {
557        let baseline = InvestigationReport {
558            page_title: None,
559            total_requests: 100,
560            blocked_requests: 5,
561            status_histogram: BTreeMap::new(),
562            resource_type_histogram: BTreeMap::new(),
563            provider_histogram: BTreeMap::new(),
564            marker_histogram: BTreeMap::new(),
565            top_markers: Vec::new(),
566            hosts: Vec::new(),
567            suspicious_requests: Vec::new(),
568            aggregate: Detection {
569                provider: AntiBotProvider::Unknown,
570                confidence: 0.0,
571                markers: Vec::new(),
572            },
573            target_class: None,
574        };
575
576        let candidate = InvestigationReport {
577            blocked_requests: 12,
578            ..baseline.clone()
579        };
580
581        let diff = compare_reports(&baseline, &candidate);
582        assert!(diff.blocked_ratio_delta > 0.02);
583        assert!(diff.likely_regression);
584    }
585
586    #[test]
587    fn infer_requirements_identifies_cloudflare_signals() {
588        let mut status_histogram = BTreeMap::new();
589        let _ = status_histogram.insert(403, 7);
590
591        let mut resource_histogram = BTreeMap::new();
592        let _ = resource_histogram.insert("document".to_string(), 10);
593
594        let report = InvestigationReport {
595            page_title: Some("https://example.com".to_string()),
596            total_requests: 10,
597            blocked_requests: 7,
598            status_histogram,
599            resource_type_histogram: resource_histogram,
600            provider_histogram: BTreeMap::new(),
601            marker_histogram: BTreeMap::from([
602                ("cf-ray".to_string(), 5),
603                ("__cf_bm".to_string(), 5),
604            ]),
605            top_markers: vec![
606                MarkerCount {
607                    marker: "cf-ray".to_string(),
608                    count: 5,
609                },
610                MarkerCount {
611                    marker: "__cf_bm".to_string(),
612                    count: 5,
613                },
614            ],
615            hosts: Vec::new(),
616            suspicious_requests: Vec::new(),
617            aggregate: Detection {
618                provider: AntiBotProvider::Cloudflare,
619                confidence: 0.9,
620                markers: vec!["cf-ray".to_string()],
621            },
622            target_class: None,
623        };
624
625        let profile = infer_requirements(&report);
626        assert_eq!(profile.provider, AntiBotProvider::Cloudflare);
627        assert!(!profile.requirements.is_empty());
628        assert_eq!(
629            profile.recommendation.strategy,
630            AdapterStrategy::BrowserStealth
631        );
632    }
633
634    #[test]
635    fn infer_requirements_applies_slo_for_api_target() {
636        // 20% blocked ratio is critical for API targets (critical at 15%)
637        let mut status_histogram = BTreeMap::new();
638        let _ = status_histogram.insert(403, 2);
639        let _ = status_histogram.insert(429, 3);
640
641        let report = InvestigationReport {
642            page_title: None,
643            total_requests: 25,
644            blocked_requests: 5,
645            status_histogram,
646            resource_type_histogram: BTreeMap::new(),
647            provider_histogram: BTreeMap::new(),
648            marker_histogram: BTreeMap::new(),
649            top_markers: Vec::new(),
650            hosts: Vec::new(),
651            suspicious_requests: Vec::new(),
652            aggregate: Detection {
653                provider: AntiBotProvider::Unknown,
654                confidence: 0.0,
655                markers: Vec::new(),
656            },
657            target_class: Some(TargetClass::Api),
658        };
659
660        let profile = infer_requirements(&report);
661
662        // Should find adaptive_rate_and_retry_budget requirement with High level (critical)
663        let adaptive_req = profile
664            .requirements
665            .iter()
666            .find(|r| r.id == "adaptive_rate_and_retry_budget");
667        assert!(adaptive_req.is_some());
668        if let Some(req) = adaptive_req {
669            assert_eq!(req.level, RequirementLevel::High);
670        }
671    }
672
673    #[test]
674    fn infer_requirements_with_target_class_respects_slo_thresholds() {
675        // 20% blocked ratio assessment differs by target class
676        let report = InvestigationReport {
677            page_title: None,
678            total_requests: 100,
679            blocked_requests: 20,
680            status_histogram: BTreeMap::from([(403, 20)]),
681            resource_type_histogram: BTreeMap::new(),
682            provider_histogram: BTreeMap::new(),
683            marker_histogram: BTreeMap::new(),
684            top_markers: Vec::new(),
685            hosts: Vec::new(),
686            suspicious_requests: Vec::new(),
687            aggregate: Detection {
688                provider: AntiBotProvider::Unknown,
689                confidence: 0.0,
690                markers: Vec::new(),
691            },
692            target_class: None,
693        };
694
695        // For API: 20% is critical (threshold 15%)
696        let api_profile = infer_requirements_with_target_class(&report, TargetClass::Api);
697        let api_req = api_profile
698            .requirements
699            .iter()
700            .find(|r| r.id == "adaptive_rate_and_retry_budget");
701        assert!(api_req.is_some());
702        if let Some(req) = api_req {
703            assert_eq!(req.level, RequirementLevel::High); // Critical
704        }
705
706        // For ContentSite: 20% is in warning zone (acceptable 15%, warning 25%)
707        let content_profile =
708            infer_requirements_with_target_class(&report, TargetClass::ContentSite);
709        let content_req = content_profile
710            .requirements
711            .iter()
712            .find(|r| r.id == "adaptive_rate_and_retry_budget");
713        assert!(content_req.is_some());
714        if let Some(req) = content_req {
715            assert_eq!(req.level, RequirementLevel::Medium); // Warning
716        }
717
718        // For HighSecurity: 20% is acceptable (threshold 30%)
719        let high_sec_profile =
720            infer_requirements_with_target_class(&report, TargetClass::HighSecurity);
721        let high_sec_req = high_sec_profile
722            .requirements
723            .iter()
724            .find(|r| r.id == "adaptive_rate_and_retry_budget");
725        assert!(high_sec_req.is_none()); // Below acceptable, no requirement
726    }
727
728    #[test]
729    fn infer_requirements_below_slo_has_no_adaptive_requirement() {
730        // 5% blocked ratio is acceptable for API targets
731        let report = InvestigationReport {
732            page_title: None,
733            total_requests: 100,
734            blocked_requests: 5,
735            status_histogram: BTreeMap::from([(403, 5)]),
736            resource_type_histogram: BTreeMap::new(),
737            provider_histogram: BTreeMap::new(),
738            marker_histogram: BTreeMap::new(),
739            top_markers: Vec::new(),
740            hosts: Vec::new(),
741            suspicious_requests: Vec::new(),
742            aggregate: Detection {
743                provider: AntiBotProvider::Unknown,
744                confidence: 0.0,
745                markers: Vec::new(),
746            },
747            target_class: None,
748        };
749
750        let profile = infer_requirements_with_target_class(&report, TargetClass::Api);
751
752        // Should NOT find adaptive_rate_and_retry_budget requirement (acceptable for API)
753        let adaptive_req = profile
754            .requirements
755            .iter()
756            .find(|r| r.id == "adaptive_rate_and_retry_budget");
757        assert!(adaptive_req.is_none());
758    }
759
760    #[cfg(feature = "caching")]
761    #[test]
762    fn cached_investigation_sets_target_class_and_reuses_cached_report() {
763        let capacity = NonZeroUsize::new(8).unwrap_or(NonZeroUsize::MIN);
764        let cache = MemoryInvestigationCache::new(capacity, Duration::from_mins(1));
765        let har_json = r#"{
766            "log": {
767                "version": "1.2.0",
768                "creator": {"name": "test", "version": "1.0"},
769                "pages": [{"id": "page1", "title": "test", "startedDateTime": "2025-01-01T00:00:00Z", "pageTimings": {"onLoad": 0}}],
770                "entries": []
771            }
772        }"#;
773
774        let first_result =
775            investigate_har_cached_with_target_class(har_json, TargetClass::Api, &cache);
776        assert!(first_result.is_ok(), "cached investigation should succeed");
777        let second_result =
778            investigate_har_cached_with_target_class(har_json, TargetClass::Api, &cache);
779        assert!(
780            second_result.is_ok(),
781            "cached investigation should hit cache"
782        );
783
784        let Ok(first) = first_result else {
785            return;
786        };
787        let Ok(second) = second_result else {
788            return;
789        };
790
791        assert_eq!(first.target_class, Some(TargetClass::Api));
792        assert_eq!(second.target_class, Some(TargetClass::Api));
793        assert_eq!(first, second);
794    }
795}