1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::har;
6use crate::investigation::investigate_har;
7use crate::policy::plan_from_report;
8use crate::probe::{ProbePackReport, challenge_probe_pack, run_probe_pack};
9use crate::snapshot;
10use crate::types::{InvestigationBundle, InvestigationReport, RequirementsProfile, RuntimePolicy};
11use crate::vendor_classifier::VendorClassification;
12use crate::vendor_classifier::VendorClassifier;
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
25pub enum BundleRedactionPolicy {
26 None,
28 #[default]
31 Standard,
32 Aggressive,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39pub struct BundleMetadata {
40 pub schema_version: String,
42 pub assembled_at: String,
44 pub redaction_policy: BundleRedactionPolicy,
46 #[serde(default)]
48 pub annotations: BTreeMap<String, String>,
49}
50
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
82pub struct DiagnosticBundle {
83 pub metadata: BundleMetadata,
85 pub report: InvestigationReport,
87 pub requirements: RequirementsProfile,
89 pub policy: RuntimePolicy,
91 pub probe_report: ProbePackReport,
93 #[serde(default)]
98 pub coherence_violations: Vec<BundleCoherenceViolation>,
99 #[serde(default, skip_serializing_if = "Option::is_none")]
109 pub vendor_classification: Option<VendorClassification>,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
114pub struct BundleCoherenceViolation {
115 pub rule_id: String,
117 pub message: String,
119 pub paths: Vec<String>,
121}
122
123#[derive(Debug, thiserror::Error)]
125pub enum BundleError {
126 #[error("HAR parse error: {0}")]
128 Har(#[from] har::HarError),
129}
130
131pub fn build_diagnostic_bundle(
161 har_json: &str,
162 redaction_policy: BundleRedactionPolicy,
163) -> Result<DiagnosticBundle, BundleError> {
164 let classifier = VendorClassifier::with_builtin_defaults();
165 build_diagnostic_bundle_with_vendor_classifier(har_json, redaction_policy, &classifier)
166}
167
168pub fn build_diagnostic_bundle_with_snapshot(
181 har_json: &str,
182 redaction_policy: BundleRedactionPolicy,
183 snap: &snapshot::NormalizedFingerprintSnapshot,
184) -> Result<DiagnosticBundle, BundleError> {
185 let classifier = VendorClassifier::with_builtin_defaults();
186 build_diagnostic_bundle_full(har_json, redaction_policy, &classifier, Some(snap))
187}
188
189pub fn build_diagnostic_bundle_with_vendor_classifier(
202 har_json: &str,
203 redaction_policy: BundleRedactionPolicy,
204 classifier: &VendorClassifier,
205) -> Result<DiagnosticBundle, BundleError> {
206 build_diagnostic_bundle_full(har_json, redaction_policy, classifier, None)
207}
208
209pub fn build_diagnostic_bundle_full(
220 har_json: &str,
221 redaction_policy: BundleRedactionPolicy,
222 classifier: &VendorClassifier,
223 snap: Option<&snapshot::NormalizedFingerprintSnapshot>,
224) -> Result<DiagnosticBundle, BundleError> {
225 let report = investigate_har(har_json)?;
226 let plan = plan_from_report(report);
227
228 let coherence_violations = snap.map_or_else(Vec::new, |s| {
229 let coherence = snapshot::evaluate_snapshot_coherence(s);
230 coherence
231 .violations
232 .into_iter()
233 .map(|v| BundleCoherenceViolation {
234 rule_id: v.rule_id,
235 message: v.message,
236 paths: v.paths,
237 })
238 .collect()
239 });
240
241 let probe_report = run_probe_pack(&challenge_probe_pack());
242 let vendor_classification = classifier
250 .classify_har(har_json)
251 .ok()
252 .filter(|c| c.is_identified() || !c.evidence.is_empty());
253
254 let mut bundle = DiagnosticBundle {
255 metadata: make_metadata(redaction_policy),
256 report: plan.report,
257 requirements: plan.requirements,
258 policy: plan.policy,
259 probe_report,
260 coherence_violations,
261 vendor_classification,
262 };
263
264 apply_redaction(&mut bundle);
265 Ok(bundle)
266}
267
268#[must_use]
273pub fn diagnostic_bundle_from_investigation(
274 bundle: InvestigationBundle,
275 redaction_policy: BundleRedactionPolicy,
276) -> DiagnosticBundle {
277 let probe_report = run_probe_pack(&challenge_probe_pack());
278 let mut result = DiagnosticBundle {
279 metadata: make_metadata(redaction_policy),
280 report: bundle.report,
281 requirements: bundle.requirements,
282 policy: bundle.policy,
283 probe_report,
284 coherence_violations: Vec::new(),
285 vendor_classification: None,
286 };
287 apply_redaction(&mut result);
288 result
289}
290
291fn make_metadata(redaction_policy: BundleRedactionPolicy) -> BundleMetadata {
292 BundleMetadata {
293 schema_version: "1.0".to_string(),
294 assembled_at: chrono_now(),
295 redaction_policy,
296 annotations: BTreeMap::new(),
297 }
298}
299
300fn chrono_now() -> String {
302 use std::time::{SystemTime, UNIX_EPOCH};
305 let secs = SystemTime::now()
306 .duration_since(UNIX_EPOCH)
307 .map_or(0, |d| d.as_secs());
308 format!("unix:{secs}")
309}
310
311pub fn apply_redaction(bundle: &mut DiagnosticBundle) {
313 match bundle.metadata.redaction_policy {
314 BundleRedactionPolicy::None => {}
315 BundleRedactionPolicy::Standard => redact_standard(bundle),
316 BundleRedactionPolicy::Aggressive => redact_aggressive(bundle),
317 }
318}
319
320const REDACTED: &str = "[REDACTED]";
321
322fn redact_standard(bundle: &mut DiagnosticBundle) {
323 for req in &mut bundle.report.suspicious_requests {
325 redact_url_credentials(&mut req.url);
326 }
327 if let Some(title) = &mut bundle.report.page_title {
329 redact_url_credentials(title);
330 }
331}
332
333fn redact_aggressive(bundle: &mut DiagnosticBundle) {
334 for req in &mut bundle.report.suspicious_requests {
336 redact_url_credentials(&mut req.url);
337 redact_url_query(&mut req.url);
338 }
339 bundle.report.top_markers.clear();
341 bundle.report.marker_histogram.clear();
342 for req in &mut bundle.report.suspicious_requests {
343 req.detection.markers.clear();
344 }
345}
346
347fn redact_url_credentials(url: &mut String) {
348 if let Some(at_pos) = url.find('@')
350 && let Some(scheme_end) = url.find("://")
351 {
352 let after_scheme = scheme_end + 3;
353 if after_scheme < at_pos {
354 let scheme = url[..scheme_end].to_string();
355 let rest = url[at_pos + 1..].to_string();
356 *url = format!("{scheme}://{REDACTED}@{rest}");
357 }
358 }
359}
360
361fn redact_url_query(url: &mut String) {
362 if let Some(q) = url.find('?') {
363 url.truncate(q);
364 url.push('?');
365 url.push_str(REDACTED);
366 }
367}
368
369#[cfg(test)]
370#[allow(
371 clippy::unwrap_used,
372 clippy::expect_used,
373 clippy::panic,
374 clippy::indexing_slicing
375)]
376mod tests {
377 use super::*;
378
379 const EMPTY_HAR: &str =
380 r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[]}}"#;
381
382 #[test]
383 fn build_diagnostic_bundle_empty_har() {
384 let result = build_diagnostic_bundle(EMPTY_HAR, BundleRedactionPolicy::Standard);
385 assert!(result.is_ok(), "bundle build should succeed");
386 let Ok(bundle) = result else {
387 return;
388 };
389 assert_eq!(bundle.metadata.schema_version, "1.0");
390 assert_eq!(
391 bundle.metadata.redaction_policy,
392 BundleRedactionPolicy::Standard
393 );
394 assert!(bundle.probe_report.total > 0);
395 assert!(bundle.coherence_violations.is_empty());
396 }
397
398 #[test]
399 fn redaction_standard_masks_url_credentials() {
400 let har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[
401 {"startedDateTime":"2026-01-01T00:00:00Z","time":100,
402 "request":{"method":"GET","url":"https://user:pass@example.com/page","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},
403 "response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1",
404 "headers":[{"name":"cf-ray","value":"abc-LHR"},{"name":"server","value":"cloudflare"}],
405 "cookies":[],"content":{"size":0,"mimeType":"text/html"},"redirectURL":"","headersSize":-1,"bodySize":-1},
406 "cache":{},"timings":{"send":0,"wait":100,"receive":0}}
407 ]}}"#;
408
409 let result = build_diagnostic_bundle(har, BundleRedactionPolicy::Standard);
410 assert!(result.is_ok(), "bundle build should succeed");
411 let Ok(bundle) = result else {
412 return;
413 };
414 for req in &bundle.report.suspicious_requests {
415 assert!(
416 !req.url.contains("user:pass"),
417 "URL credentials should be redacted: {}",
418 req.url
419 );
420 }
421 }
422
423 #[test]
424 fn redaction_none_preserves_url_credentials() {
425 let har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[
426 {"startedDateTime":"2026-01-01T00:00:00Z","time":100,
427 "request":{"method":"GET","url":"https://user:pass@example.com/page","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},
428 "response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1",
429 "headers":[{"name":"cf-ray","value":"abc-LHR"},{"name":"server","value":"cloudflare"}],
430 "cookies":[],"content":{"size":0,"mimeType":"text/html"},"redirectURL":"","headersSize":-1,"bodySize":-1},
431 "cache":{},"timings":{"send":0,"wait":100,"receive":0}}
432 ]}}"#;
433
434 let result = build_diagnostic_bundle(har, BundleRedactionPolicy::None);
435 assert!(result.is_ok(), "bundle build should succeed");
436 let Ok(bundle) = result else {
437 return;
438 };
439 for req in &bundle.report.suspicious_requests {
440 assert!(
441 req.url.contains("user:pass"),
442 "URL credentials should be preserved with None policy: {}",
443 req.url
444 );
445 }
446 }
447
448 #[test]
449 fn bundle_metadata_schema_version_is_stable() {
450 let result = build_diagnostic_bundle(EMPTY_HAR, BundleRedactionPolicy::None);
451 assert!(result.is_ok(), "bundle build should succeed");
452 let Ok(bundle) = result else {
453 return;
454 };
455 assert_eq!(bundle.metadata.schema_version, "1.0");
456 }
457
458 #[test]
459 fn redact_url_credentials_removes_userinfo() {
460 let mut url = "https://user:pass@example.com/path".to_string();
461 redact_url_credentials(&mut url);
462 assert!(
463 !url.contains("user:pass"),
464 "URL credentials should be removed: {url}"
465 );
466 assert!(url.contains(REDACTED));
467 }
468
469 #[test]
470 fn redact_url_query_removes_query_string() {
471 let mut url = "https://example.com/path?token=secret&other=val".to_string();
472 redact_url_query(&mut url);
473 assert!(
474 !url.contains("secret"),
475 "query string should be removed: {url}"
476 );
477 assert!(url.contains('?'));
478 }
479}