Skip to main content

stygian_charon/
observatory.rs

1use serde::{Deserialize, Serialize};
2use thiserror::Error;
3
4use crate::analyzer::AnalyzerProfile;
5use crate::har;
6use crate::infer_requirements_with_target_class;
7use crate::investigation::investigate_har_with_profile;
8use crate::policy::build_runtime_policy;
9use crate::types::{AntiBotProvider, ExecutionMode, SessionMode, TargetClass, TelemetryLevel};
10
11/// One external observatory HAR input to compare against a baseline HAR.
12#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
13pub struct ObservatoryCase {
14    /// Stable source identifier (for example, an observatory name or region key).
15    pub source_id: String,
16    /// HAR payload captured by the external source.
17    pub har_json: String,
18}
19
20/// Escalation level derived from inferred adaptive requirements.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22#[serde(rename_all = "snake_case")]
23pub enum ObservatoryEscalation {
24    /// No adaptive escalation requirement detected.
25    Acceptable,
26    /// Warning zone escalation.
27    Warning,
28    /// Critical zone escalation.
29    Critical,
30}
31
32/// Summarized outcome for one baseline/external observatory case.
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct ObservatorySample {
35    /// Source identifier for this sample.
36    pub source_id: String,
37    /// Aggregate provider predicted from the HAR.
38    pub provider: AntiBotProvider,
39    /// Aggregate provider confidence in [0.0, 1.0].
40    pub confidence: f64,
41    /// Total requests in this HAR.
42    pub total_requests: u64,
43    /// Blocked/challenged requests in this HAR.
44    pub blocked_requests: u64,
45    /// Blocked ratio in [0.0, 1.0].
46    pub blocked_ratio: f64,
47    /// Derived escalation level for this source.
48    pub escalation: ObservatoryEscalation,
49    /// Planned execution mode for this source.
50    pub execution_mode: ExecutionMode,
51    /// Planned session mode for this source.
52    pub session_mode: SessionMode,
53    /// Planned telemetry level for this source.
54    pub telemetry_level: TelemetryLevel,
55    /// Planned risk score in [0.0, 1.0].
56    pub risk_score: f64,
57}
58
59/// One baseline-vs-external comparison row.
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61pub struct ObservatoryComparison {
62    /// External source identifier.
63    pub source_id: String,
64    /// Whether provider prediction matches baseline.
65    pub provider_matches_baseline: bool,
66    /// External blocked ratio minus baseline blocked ratio.
67    pub blocked_ratio_delta: f64,
68    /// External confidence minus baseline confidence.
69    pub confidence_delta: f64,
70    /// External risk score minus baseline risk score.
71    pub risk_score_delta: f64,
72    /// Whether escalation level changed compared with baseline.
73    pub escalation_changed: bool,
74    /// Recommended action for this comparison.
75    pub recommended_action: String,
76}
77
78/// Aggregate report for one observatory run.
79#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
80pub struct ObservatoryReport {
81    /// Baseline sample built from the baseline HAR.
82    pub baseline: ObservatorySample,
83    /// One sample per external observatory source.
84    pub external: Vec<ObservatorySample>,
85    /// Baseline-vs-external comparisons.
86    pub comparisons: Vec<ObservatoryComparison>,
87    /// Number of comparisons with provider disagreement.
88    pub provider_disagreements: usize,
89    /// True when any comparison indicates likely regression.
90    pub has_regression: bool,
91}
92
93/// Errors returned by observatory runners.
94#[derive(Debug, Error)]
95pub enum ObservatoryError {
96    /// No external observatory cases were provided.
97    #[error("observatory run requires at least one external case")]
98    EmptyExternalCases,
99    /// HAR parsing/investigation failed.
100    #[error(transparent)]
101    Har(#[from] har::HarError),
102    /// Live HTTP capture failed.
103    #[cfg(feature = "live-validation")]
104    #[error("live observatory capture failed for source '{source_id}': {message}")]
105    LiveCapture {
106        /// Source identifier for the failed capture.
107        source_id: String,
108        /// Human-readable transport error.
109        message: String,
110    },
111}
112
113/// Compare a baseline HAR against external observatory HAR captures.
114///
115/// # Errors
116///
117/// Returns [`ObservatoryError::EmptyExternalCases`] when `external_cases` is empty,
118/// and [`ObservatoryError::Har`] when any HAR payload is invalid.
119pub fn run_external_observatory_from_hars(
120    baseline_har: &str,
121    external_cases: &[ObservatoryCase],
122    profile: &AnalyzerProfile,
123    target_class: TargetClass,
124) -> Result<ObservatoryReport, ObservatoryError> {
125    if external_cases.is_empty() {
126        return Err(ObservatoryError::EmptyExternalCases);
127    }
128
129    let baseline = evaluate_case("baseline", baseline_har, profile, target_class)?;
130
131    let mut external = Vec::with_capacity(external_cases.len());
132    for case in external_cases {
133        external.push(evaluate_case(
134            &case.source_id,
135            &case.har_json,
136            profile,
137            target_class,
138        )?);
139    }
140
141    let comparisons = external
142        .iter()
143        .map(|sample| compare_sample(&baseline, sample))
144        .collect::<Vec<_>>();
145
146    let provider_disagreements = comparisons
147        .iter()
148        .filter(|comparison| !comparison.provider_matches_baseline)
149        .count();
150
151    let has_regression = comparisons
152        .iter()
153        .any(|comparison| comparison.recommended_action == "investigate_regression");
154
155    Ok(ObservatoryReport {
156        baseline,
157        external,
158        comparisons,
159        provider_disagreements,
160        has_regression,
161    })
162}
163
164/// One live observatory probe configuration.
165#[cfg(feature = "live-validation")]
166#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
167pub struct LiveObservatoryProbe {
168    /// Stable source identifier for this probe.
169    pub source_id: String,
170    /// User-Agent sent for this probe.
171    pub user_agent: String,
172    /// Additional request headers.
173    #[serde(default)]
174    pub headers: std::collections::BTreeMap<String, String>,
175    /// Request timeout in milliseconds.
176    pub timeout_ms: u64,
177}
178
179/// Execute live observatory probes and compare them with the baseline HAR.
180///
181/// This helper is feature-gated behind `live-validation` and performs HTTP
182/// requests using `reqwest`, synthesizing one HAR payload per probe.
183///
184/// # Errors
185///
186/// Returns [`ObservatoryError::EmptyExternalCases`] when no probes are supplied,
187/// [`ObservatoryError::LiveCapture`] when a probe request fails, and
188/// [`ObservatoryError::Har`] when generated HAR payloads cannot be parsed.
189#[cfg(feature = "live-validation")]
190pub async fn run_external_observatory_live(
191    baseline_har: &str,
192    target_url: &str,
193    probes: &[LiveObservatoryProbe],
194    profile: &AnalyzerProfile,
195    target_class: TargetClass,
196) -> Result<ObservatoryReport, ObservatoryError> {
197    if probes.is_empty() {
198        return Err(ObservatoryError::EmptyExternalCases);
199    }
200
201    let mut external_cases = Vec::with_capacity(probes.len());
202    for probe in probes {
203        let har_json = capture_probe_har(target_url, probe).await?;
204        external_cases.push(ObservatoryCase {
205            source_id: probe.source_id.clone(),
206            har_json,
207        });
208    }
209
210    run_external_observatory_from_hars(baseline_har, &external_cases, profile, target_class)
211}
212
213fn evaluate_case(
214    source_id: &str,
215    har_json: &str,
216    profile: &AnalyzerProfile,
217    target_class: TargetClass,
218) -> Result<ObservatorySample, ObservatoryError> {
219    let report = investigate_har_with_profile(har_json, profile)?;
220    let requirements = infer_requirements_with_target_class(&report, target_class);
221    let policy = build_runtime_policy(&report, &requirements);
222
223    let blocked_ratio = blocked_ratio(report.blocked_requests, report.total_requests);
224
225    Ok(ObservatorySample {
226        source_id: source_id.to_string(),
227        provider: report.aggregate.provider,
228        confidence: report.aggregate.confidence,
229        total_requests: report.total_requests,
230        blocked_requests: report.blocked_requests,
231        blocked_ratio,
232        escalation: escalation_from_requirements(&requirements),
233        execution_mode: policy.execution_mode,
234        session_mode: policy.session_mode,
235        telemetry_level: policy.telemetry_level,
236        risk_score: policy.risk_score,
237    })
238}
239
240fn escalation_from_requirements(
241    requirements: &crate::types::RequirementsProfile,
242) -> ObservatoryEscalation {
243    let adaptive = requirements
244        .requirements
245        .iter()
246        .find(|requirement| requirement.id == "adaptive_rate_and_retry_budget");
247
248    match adaptive.map(|requirement| requirement.level) {
249        Some(crate::types::RequirementLevel::High) => ObservatoryEscalation::Critical,
250        Some(crate::types::RequirementLevel::Medium) => ObservatoryEscalation::Warning,
251        _ => ObservatoryEscalation::Acceptable,
252    }
253}
254
255fn compare_sample(
256    baseline: &ObservatorySample,
257    sample: &ObservatorySample,
258) -> ObservatoryComparison {
259    let provider_matches_baseline = sample.provider == baseline.provider;
260    let blocked_ratio_delta = sample.blocked_ratio - baseline.blocked_ratio;
261    let confidence_delta = sample.confidence - baseline.confidence;
262    let risk_score_delta = sample.risk_score - baseline.risk_score;
263    let escalation_changed = sample.escalation != baseline.escalation;
264
265    let recommended_action = if escalation_changed || blocked_ratio_delta >= 0.05 {
266        "investigate_regression".to_string()
267    } else if !provider_matches_baseline && confidence_delta >= 0.15 {
268        "investigate_provider_drift".to_string()
269    } else if blocked_ratio_delta <= -0.05 && risk_score_delta <= -0.10 {
270        "improved_stability".to_string()
271    } else {
272        "monitor".to_string()
273    };
274
275    ObservatoryComparison {
276        source_id: sample.source_id.clone(),
277        provider_matches_baseline,
278        blocked_ratio_delta,
279        confidence_delta,
280        risk_score_delta,
281        escalation_changed,
282        recommended_action,
283    }
284}
285
286#[cfg(feature = "live-validation")]
287async fn capture_probe_har(
288    target_url: &str,
289    probe: &LiveObservatoryProbe,
290) -> Result<String, ObservatoryError> {
291    let timeout = std::time::Duration::from_millis(probe.timeout_ms);
292    let client = reqwest::Client::builder()
293        .timeout(timeout)
294        .user_agent(probe.user_agent.clone())
295        .build()
296        .map_err(|error| ObservatoryError::LiveCapture {
297            source_id: probe.source_id.clone(),
298            message: format!("failed to build client: {error}"),
299        })?;
300
301    let mut request = client.get(target_url).header(
302        "accept",
303        "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
304    );
305
306    for (name, value) in &probe.headers {
307        request = request.header(name, value);
308    }
309
310    let response = request
311        .send()
312        .await
313        .map_err(|error| ObservatoryError::LiveCapture {
314            source_id: probe.source_id.clone(),
315            message: error.to_string(),
316        })?;
317
318    let status = response.status().as_u16();
319    let headers = response
320        .headers()
321        .iter()
322        .filter_map(|(name, value)| {
323            value
324                .to_str()
325                .ok()
326                .map(|parsed| (name.as_str().to_string(), parsed.to_string()))
327        })
328        .collect::<std::collections::BTreeMap<_, _>>();
329
330    Ok(build_single_request_har(target_url, status, &headers))
331}
332
333#[cfg(feature = "live-validation")]
334fn build_single_request_har(
335    url: &str,
336    status: u16,
337    headers: &std::collections::BTreeMap<String, String>,
338) -> String {
339    let response_headers = headers
340        .iter()
341        .map(|(name, value)| {
342            serde_json::json!({
343                "name": name,
344                "value": value,
345            })
346        })
347        .collect::<Vec<_>>();
348
349    serde_json::json!({
350        "log": {
351            "version": "1.2",
352            "creator": {"name": "stygian-charon-observatory", "version": "0.1"},
353            "pages": [{
354                "id": "page_1",
355                "title": url,
356                "startedDateTime": "2026-01-01T00:00:00.000Z",
357                "pageTimings": {"onLoad": 0}
358            }],
359            "entries": [{
360                "pageref": "page_1",
361                "startedDateTime": "2026-01-01T00:00:00.000Z",
362                "time": 0,
363                "request": {
364                    "method": "GET",
365                    "url": url,
366                    "httpVersion": "HTTP/2",
367                    "headers": [],
368                    "queryString": [],
369                    "cookies": [],
370                    "headersSize": -1,
371                    "bodySize": 0
372                },
373                "response": {
374                    "status": status,
375                    "statusText": "live-capture",
376                    "httpVersion": "HTTP/2",
377                    "headers": response_headers,
378                    "cookies": [],
379                    "content": {"size": 0, "mimeType": "text/html", "text": ""},
380                    "redirectURL": "",
381                    "headersSize": -1,
382                    "bodySize": 0
383                },
384                "cache": {},
385                "timings": {
386                    "blocked": 0,
387                    "dns": 0,
388                    "connect": 0,
389                    "send": 0,
390                    "wait": 0,
391                    "receive": 0,
392                    "ssl": 0
393                }
394            }]
395        }
396    })
397    .to_string()
398}
399
400#[allow(clippy::cast_precision_loss)]
401const fn to_f64(value: u64) -> f64 {
402    value as f64
403}
404
405const fn blocked_ratio(blocked_requests: u64, total_requests: u64) -> f64 {
406    if total_requests == 0 {
407        0.0
408    } else {
409        to_f64(blocked_requests) / to_f64(total_requests)
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416
417    const HAR_OK: &str = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[{"startedDateTime":"2026-01-01T00:00:00Z","time":1,"request":{"method":"GET","url":"https://example.com","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","headers":[],"cookies":[],"content":{"size":0,"mimeType":"text/html","text":""},"redirectURL":"","headersSize":-1,"bodySize":-1},"cache":{},"timings":{"send":0,"wait":1,"receive":0}}]}}"#;
418
419    const HAR_BLOCKED: &str = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[{"startedDateTime":"2026-01-01T00:00:00Z","time":1,"request":{"method":"GET","url":"https://example.com","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},"response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1","headers":[{"name":"cf-ray","value":"abc"},{"name":"server","value":"cloudflare"}],"cookies":[],"content":{"size":0,"mimeType":"text/html","text":"Attention Required! | Cloudflare"},"redirectURL":"","headersSize":-1,"bodySize":-1},"cache":{},"timings":{"send":0,"wait":1,"receive":0}}]}}"#;
420
421    fn default_profile() -> AnalyzerProfile {
422        AnalyzerProfile::default()
423    }
424
425    #[test]
426    fn observatory_runner_rejects_empty_external_cases() {
427        let result = run_external_observatory_from_hars(
428            HAR_OK,
429            &[],
430            &default_profile(),
431            TargetClass::Unknown,
432        );
433
434        assert!(matches!(result, Err(ObservatoryError::EmptyExternalCases)));
435    }
436
437    #[test]
438    fn observatory_runner_reports_regression_when_blocked_ratio_jumps() {
439        let external = vec![ObservatoryCase {
440            source_id: "ext-1".to_string(),
441            har_json: HAR_BLOCKED.to_string(),
442        }];
443
444        let result = run_external_observatory_from_hars(
445            HAR_OK,
446            &external,
447            &default_profile(),
448            TargetClass::Api,
449        );
450
451        assert!(result.is_ok());
452
453        if let Ok(report) = result {
454            assert!(report.has_regression);
455            assert_eq!(report.comparisons.len(), 1);
456            let first = report.comparisons.first();
457            assert!(first.is_some(), "expected one comparison result");
458            if let Some(first_comparison) = first {
459                assert_eq!(
460                    first_comparison.recommended_action,
461                    "investigate_regression"
462                );
463            }
464        }
465    }
466
467    #[test]
468    fn observatory_runner_reports_monitor_for_similar_inputs() {
469        let external = vec![ObservatoryCase {
470            source_id: "ext-1".to_string(),
471            har_json: HAR_OK.to_string(),
472        }];
473
474        let result = run_external_observatory_from_hars(
475            HAR_OK,
476            &external,
477            &default_profile(),
478            TargetClass::Unknown,
479        );
480
481        assert!(result.is_ok());
482
483        if let Ok(report) = result {
484            assert!(!report.has_regression);
485            let first = report.comparisons.first();
486            assert!(first.is_some(), "expected one comparison result");
487            if let Some(first_comparison) = first {
488                assert_eq!(first_comparison.recommended_action, "monitor");
489            }
490        }
491    }
492}