1use serde::{Deserialize, Serialize};
2use thiserror::Error;
3
4use crate::analyzer::AnalyzerProfile;
5use crate::har;
6use crate::infer_requirements_with_target_class;
7use crate::investigation::investigate_har_with_profile;
8use crate::policy::build_runtime_policy;
9use crate::types::{AntiBotProvider, ExecutionMode, SessionMode, TargetClass, TelemetryLevel};
10
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
13pub struct ObservatoryCase {
14 pub source_id: String,
16 pub har_json: String,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22#[serde(rename_all = "snake_case")]
23pub enum ObservatoryEscalation {
24 Acceptable,
26 Warning,
28 Critical,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct ObservatorySample {
35 pub source_id: String,
37 pub provider: AntiBotProvider,
39 pub confidence: f64,
41 pub total_requests: u64,
43 pub blocked_requests: u64,
45 pub blocked_ratio: f64,
47 pub escalation: ObservatoryEscalation,
49 pub execution_mode: ExecutionMode,
51 pub session_mode: SessionMode,
53 pub telemetry_level: TelemetryLevel,
55 pub risk_score: f64,
57}
58
59#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61pub struct ObservatoryComparison {
62 pub source_id: String,
64 pub provider_matches_baseline: bool,
66 pub blocked_ratio_delta: f64,
68 pub confidence_delta: f64,
70 pub risk_score_delta: f64,
72 pub escalation_changed: bool,
74 pub recommended_action: String,
76}
77
78#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
80pub struct ObservatoryReport {
81 pub baseline: ObservatorySample,
83 pub external: Vec<ObservatorySample>,
85 pub comparisons: Vec<ObservatoryComparison>,
87 pub provider_disagreements: usize,
89 pub has_regression: bool,
91}
92
93#[derive(Debug, Error)]
95pub enum ObservatoryError {
96 #[error("observatory run requires at least one external case")]
98 EmptyExternalCases,
99 #[error(transparent)]
101 Har(#[from] har::HarError),
102 #[cfg(feature = "live-validation")]
104 #[error("live observatory capture failed for source '{source_id}': {message}")]
105 LiveCapture {
106 source_id: String,
108 message: String,
110 },
111}
112
113pub fn run_external_observatory_from_hars(
120 baseline_har: &str,
121 external_cases: &[ObservatoryCase],
122 profile: &AnalyzerProfile,
123 target_class: TargetClass,
124) -> Result<ObservatoryReport, ObservatoryError> {
125 if external_cases.is_empty() {
126 return Err(ObservatoryError::EmptyExternalCases);
127 }
128
129 let baseline = evaluate_case("baseline", baseline_har, profile, target_class)?;
130
131 let mut external = Vec::with_capacity(external_cases.len());
132 for case in external_cases {
133 external.push(evaluate_case(
134 &case.source_id,
135 &case.har_json,
136 profile,
137 target_class,
138 )?);
139 }
140
141 let comparisons = external
142 .iter()
143 .map(|sample| compare_sample(&baseline, sample))
144 .collect::<Vec<_>>();
145
146 let provider_disagreements = comparisons
147 .iter()
148 .filter(|comparison| !comparison.provider_matches_baseline)
149 .count();
150
151 let has_regression = comparisons
152 .iter()
153 .any(|comparison| comparison.recommended_action == "investigate_regression");
154
155 Ok(ObservatoryReport {
156 baseline,
157 external,
158 comparisons,
159 provider_disagreements,
160 has_regression,
161 })
162}
163
164#[cfg(feature = "live-validation")]
166#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
167pub struct LiveObservatoryProbe {
168 pub source_id: String,
170 pub user_agent: String,
172 #[serde(default)]
174 pub headers: std::collections::BTreeMap<String, String>,
175 pub timeout_ms: u64,
177}
178
179#[cfg(feature = "live-validation")]
190pub async fn run_external_observatory_live(
191 baseline_har: &str,
192 target_url: &str,
193 probes: &[LiveObservatoryProbe],
194 profile: &AnalyzerProfile,
195 target_class: TargetClass,
196) -> Result<ObservatoryReport, ObservatoryError> {
197 if probes.is_empty() {
198 return Err(ObservatoryError::EmptyExternalCases);
199 }
200
201 let mut external_cases = Vec::with_capacity(probes.len());
202 for probe in probes {
203 let har_json = capture_probe_har(target_url, probe).await?;
204 external_cases.push(ObservatoryCase {
205 source_id: probe.source_id.clone(),
206 har_json,
207 });
208 }
209
210 run_external_observatory_from_hars(baseline_har, &external_cases, profile, target_class)
211}
212
213fn evaluate_case(
214 source_id: &str,
215 har_json: &str,
216 profile: &AnalyzerProfile,
217 target_class: TargetClass,
218) -> Result<ObservatorySample, ObservatoryError> {
219 let report = investigate_har_with_profile(har_json, profile)?;
220 let requirements = infer_requirements_with_target_class(&report, target_class);
221 let policy = build_runtime_policy(&report, &requirements);
222
223 let blocked_ratio = blocked_ratio(report.blocked_requests, report.total_requests);
224
225 Ok(ObservatorySample {
226 source_id: source_id.to_string(),
227 provider: report.aggregate.provider,
228 confidence: report.aggregate.confidence,
229 total_requests: report.total_requests,
230 blocked_requests: report.blocked_requests,
231 blocked_ratio,
232 escalation: escalation_from_requirements(&requirements),
233 execution_mode: policy.execution_mode,
234 session_mode: policy.session_mode,
235 telemetry_level: policy.telemetry_level,
236 risk_score: policy.risk_score,
237 })
238}
239
240fn escalation_from_requirements(
241 requirements: &crate::types::RequirementsProfile,
242) -> ObservatoryEscalation {
243 let adaptive = requirements
244 .requirements
245 .iter()
246 .find(|requirement| requirement.id == "adaptive_rate_and_retry_budget");
247
248 match adaptive.map(|requirement| requirement.level) {
249 Some(crate::types::RequirementLevel::High) => ObservatoryEscalation::Critical,
250 Some(crate::types::RequirementLevel::Medium) => ObservatoryEscalation::Warning,
251 _ => ObservatoryEscalation::Acceptable,
252 }
253}
254
255fn compare_sample(
256 baseline: &ObservatorySample,
257 sample: &ObservatorySample,
258) -> ObservatoryComparison {
259 let provider_matches_baseline = sample.provider == baseline.provider;
260 let blocked_ratio_delta = sample.blocked_ratio - baseline.blocked_ratio;
261 let confidence_delta = sample.confidence - baseline.confidence;
262 let risk_score_delta = sample.risk_score - baseline.risk_score;
263 let escalation_changed = sample.escalation != baseline.escalation;
264
265 let recommended_action = if escalation_changed || blocked_ratio_delta >= 0.05 {
266 "investigate_regression".to_string()
267 } else if !provider_matches_baseline && confidence_delta >= 0.15 {
268 "investigate_provider_drift".to_string()
269 } else if blocked_ratio_delta <= -0.05 && risk_score_delta <= -0.10 {
270 "improved_stability".to_string()
271 } else {
272 "monitor".to_string()
273 };
274
275 ObservatoryComparison {
276 source_id: sample.source_id.clone(),
277 provider_matches_baseline,
278 blocked_ratio_delta,
279 confidence_delta,
280 risk_score_delta,
281 escalation_changed,
282 recommended_action,
283 }
284}
285
286#[cfg(feature = "live-validation")]
287async fn capture_probe_har(
288 target_url: &str,
289 probe: &LiveObservatoryProbe,
290) -> Result<String, ObservatoryError> {
291 let timeout = std::time::Duration::from_millis(probe.timeout_ms);
292 let client = reqwest::Client::builder()
293 .timeout(timeout)
294 .user_agent(probe.user_agent.clone())
295 .build()
296 .map_err(|error| ObservatoryError::LiveCapture {
297 source_id: probe.source_id.clone(),
298 message: format!("failed to build client: {error}"),
299 })?;
300
301 let mut request = client.get(target_url).header(
302 "accept",
303 "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
304 );
305
306 for (name, value) in &probe.headers {
307 request = request.header(name, value);
308 }
309
310 let response = request
311 .send()
312 .await
313 .map_err(|error| ObservatoryError::LiveCapture {
314 source_id: probe.source_id.clone(),
315 message: error.to_string(),
316 })?;
317
318 let status = response.status().as_u16();
319 let headers = response
320 .headers()
321 .iter()
322 .filter_map(|(name, value)| {
323 value
324 .to_str()
325 .ok()
326 .map(|parsed| (name.as_str().to_string(), parsed.to_string()))
327 })
328 .collect::<std::collections::BTreeMap<_, _>>();
329
330 Ok(build_single_request_har(target_url, status, &headers))
331}
332
333#[cfg(feature = "live-validation")]
334fn build_single_request_har(
335 url: &str,
336 status: u16,
337 headers: &std::collections::BTreeMap<String, String>,
338) -> String {
339 let response_headers = headers
340 .iter()
341 .map(|(name, value)| {
342 serde_json::json!({
343 "name": name,
344 "value": value,
345 })
346 })
347 .collect::<Vec<_>>();
348
349 serde_json::json!({
350 "log": {
351 "version": "1.2",
352 "creator": {"name": "stygian-charon-observatory", "version": "0.1"},
353 "pages": [{
354 "id": "page_1",
355 "title": url,
356 "startedDateTime": "2026-01-01T00:00:00.000Z",
357 "pageTimings": {"onLoad": 0}
358 }],
359 "entries": [{
360 "pageref": "page_1",
361 "startedDateTime": "2026-01-01T00:00:00.000Z",
362 "time": 0,
363 "request": {
364 "method": "GET",
365 "url": url,
366 "httpVersion": "HTTP/2",
367 "headers": [],
368 "queryString": [],
369 "cookies": [],
370 "headersSize": -1,
371 "bodySize": 0
372 },
373 "response": {
374 "status": status,
375 "statusText": "live-capture",
376 "httpVersion": "HTTP/2",
377 "headers": response_headers,
378 "cookies": [],
379 "content": {"size": 0, "mimeType": "text/html", "text": ""},
380 "redirectURL": "",
381 "headersSize": -1,
382 "bodySize": 0
383 },
384 "cache": {},
385 "timings": {
386 "blocked": 0,
387 "dns": 0,
388 "connect": 0,
389 "send": 0,
390 "wait": 0,
391 "receive": 0,
392 "ssl": 0
393 }
394 }]
395 }
396 })
397 .to_string()
398}
399
400#[allow(clippy::cast_precision_loss)]
401const fn to_f64(value: u64) -> f64 {
402 value as f64
403}
404
405const fn blocked_ratio(blocked_requests: u64, total_requests: u64) -> f64 {
406 if total_requests == 0 {
407 0.0
408 } else {
409 to_f64(blocked_requests) / to_f64(total_requests)
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416
417 const HAR_OK: &str = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[{"startedDateTime":"2026-01-01T00:00:00Z","time":1,"request":{"method":"GET","url":"https://example.com","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","headers":[],"cookies":[],"content":{"size":0,"mimeType":"text/html","text":""},"redirectURL":"","headersSize":-1,"bodySize":-1},"cache":{},"timings":{"send":0,"wait":1,"receive":0}}]}}"#;
418
419 const HAR_BLOCKED: &str = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[{"startedDateTime":"2026-01-01T00:00:00Z","time":1,"request":{"method":"GET","url":"https://example.com","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},"response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1","headers":[{"name":"cf-ray","value":"abc"},{"name":"server","value":"cloudflare"}],"cookies":[],"content":{"size":0,"mimeType":"text/html","text":"Attention Required! | Cloudflare"},"redirectURL":"","headersSize":-1,"bodySize":-1},"cache":{},"timings":{"send":0,"wait":1,"receive":0}}]}}"#;
420
421 fn default_profile() -> AnalyzerProfile {
422 AnalyzerProfile::default()
423 }
424
425 #[test]
426 fn observatory_runner_rejects_empty_external_cases() {
427 let result = run_external_observatory_from_hars(
428 HAR_OK,
429 &[],
430 &default_profile(),
431 TargetClass::Unknown,
432 );
433
434 assert!(matches!(result, Err(ObservatoryError::EmptyExternalCases)));
435 }
436
437 #[test]
438 fn observatory_runner_reports_regression_when_blocked_ratio_jumps() {
439 let external = vec![ObservatoryCase {
440 source_id: "ext-1".to_string(),
441 har_json: HAR_BLOCKED.to_string(),
442 }];
443
444 let result = run_external_observatory_from_hars(
445 HAR_OK,
446 &external,
447 &default_profile(),
448 TargetClass::Api,
449 );
450
451 assert!(result.is_ok());
452
453 if let Ok(report) = result {
454 assert!(report.has_regression);
455 assert_eq!(report.comparisons.len(), 1);
456 let first = report.comparisons.first();
457 assert!(first.is_some(), "expected one comparison result");
458 if let Some(first_comparison) = first {
459 assert_eq!(
460 first_comparison.recommended_action,
461 "investigate_regression"
462 );
463 }
464 }
465 }
466
467 #[test]
468 fn observatory_runner_reports_monitor_for_similar_inputs() {
469 let external = vec![ObservatoryCase {
470 source_id: "ext-1".to_string(),
471 har_json: HAR_OK.to_string(),
472 }];
473
474 let result = run_external_observatory_from_hars(
475 HAR_OK,
476 &external,
477 &default_profile(),
478 TargetClass::Unknown,
479 );
480
481 assert!(result.is_ok());
482
483 if let Ok(report) = result {
484 assert!(!report.has_regression);
485 let first = report.comparisons.first();
486 assert!(first.is_some(), "expected one comparison result");
487 if let Some(first_comparison) = first {
488 assert_eq!(first_comparison.recommended_action, "monitor");
489 }
490 }
491 }
492}