Skip to main content

stygian_charon/
bundle.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::har;
6use crate::investigation::investigate_har;
7use crate::policy::plan_from_report;
8use crate::probe::{ProbePackReport, challenge_probe_pack, run_probe_pack};
9use crate::snapshot;
10use crate::types::{InvestigationBundle, InvestigationReport, RequirementsProfile, RuntimePolicy};
11use crate::vendor_classifier::VendorClassification;
12use crate::vendor_classifier::VendorClassifier;
13
14/// Controls how sensitive fields are treated when serialising a [`DiagnosticBundle`].
15///
16/// # Example
17///
18/// ```rust
19/// use stygian_charon::bundle::BundleRedactionPolicy;
20///
21/// let policy = BundleRedactionPolicy::Standard;
22/// assert!(!matches!(policy, BundleRedactionPolicy::None));
23/// ```
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
25pub enum BundleRedactionPolicy {
26    /// No redaction — full detail retained (development / local use only).
27    None,
28    /// Redact cookies, auth headers, and URL credentials.
29    /// This is the **recommended default** for incident reporting.
30    #[default]
31    Standard,
32    /// Redact all response headers and URL query parameters in addition to
33    /// everything covered by `Standard`.
34    Aggressive,
35}
36
37/// Metadata fields attached to every [`DiagnosticBundle`].
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39pub struct BundleMetadata {
40    /// Schema version of the bundle format (`"1.0"`).
41    pub schema_version: String,
42    /// RFC 3339 timestamp at which the bundle was assembled.
43    pub assembled_at: String,
44    /// Redaction policy applied to this bundle.
45    pub redaction_policy: BundleRedactionPolicy,
46    /// Arbitrary key/value annotations (tooling, environment, etc.).
47    #[serde(default)]
48    pub annotations: BTreeMap<String, String>,
49}
50
51/// Full diagnostic bundle for a single investigation.
52///
53/// The bundle aggregates the investigation report, requirements, policy,
54/// built-in probe outcomes, and optional fingerprint coherence results into a
55/// single document suitable for incident response tooling.
56///
57/// Sensitive fields (cookies, auth headers) are redacted according to
58/// [`BundleMetadata::redaction_policy`].
59///
60/// # Format
61///
62/// The bundle serialises to JSON via `serde`. Top-level fields are:
63/// - `metadata` — provenance and redaction policy
64/// - `report` — aggregated [`InvestigationReport`]
65/// - `requirements` — inferred [`RequirementsProfile`]
66/// - `policy` — planned [`RuntimePolicy`]
67/// - `probe_report` — outcome of the built-in [`challenge_probe_pack`]
68/// - `coherence_violations` — list of `{ rule_id, message, paths }` objects; empty when clean
69/// - `vendor_classification` — T89 vendor-fingerprinting classification
70///   (additive, `#[serde(default, skip_serializing_if = "Option::is_none")]`)
71///
72/// # Example
73///
74/// ```rust
75/// use stygian_charon::bundle::{build_diagnostic_bundle, BundleRedactionPolicy};
76///
77/// let empty_har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[]}}"#;
78/// let bundle = build_diagnostic_bundle(empty_har, BundleRedactionPolicy::Standard).unwrap();
79/// assert_eq!(bundle.metadata.schema_version, "1.0");
80/// ```
81#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
82pub struct DiagnosticBundle {
83    /// Bundle provenance and redaction policy.
84    pub metadata: BundleMetadata,
85    /// Aggregated investigation report.
86    pub report: InvestigationReport,
87    /// Inferred requirements profile.
88    pub requirements: RequirementsProfile,
89    /// Planned runtime policy.
90    pub policy: RuntimePolicy,
91    /// Outcome of running the challenge probe pack against the built-in classifier.
92    pub probe_report: ProbePackReport,
93    /// Coherence violations across response headers in the investigation.
94    ///
95    /// Only populated when a [`snapshot::NormalizedFingerprintSnapshot`] is supplied
96    /// via [`build_diagnostic_bundle_with_snapshot`].
97    #[serde(default)]
98    pub coherence_violations: Vec<BundleCoherenceViolation>,
99    /// T89 vendor fingerprinting classification (additive field).
100    ///
101    /// Populated when the bundle is built from a HAR that contains
102    /// enough information to identify a vendor; absent (and skipped
103    /// during serialisation) otherwise. The field uses
104    /// `#[serde(default, skip_serializing_if = "Option::is_none")]`
105    /// so older JSON payloads (pre-T89) deserialize unchanged and
106    /// newer payloads that did not detect a vendor omit the field
107    /// rather than emit `"vendor_classification": null`.
108    #[serde(default, skip_serializing_if = "Option::is_none")]
109    pub vendor_classification: Option<VendorClassification>,
110}
111
112/// A redacted coherence violation record.
113#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
114pub struct BundleCoherenceViolation {
115    /// Stable rule identifier (e.g. `"user_agent_header_match"`).
116    pub rule_id: String,
117    /// Human-readable explanation.
118    pub message: String,
119    /// JSON-path-like pointers to the offending fields.
120    pub paths: Vec<String>,
121}
122
123/// Error type for diagnostic bundle construction.
124#[derive(Debug, thiserror::Error)]
125pub enum BundleError {
126    /// The HAR input could not be parsed.
127    #[error("HAR parse error: {0}")]
128    Har(#[from] har::HarError),
129}
130
131/// Build a [`DiagnosticBundle`] from a raw HAR payload.
132///
133/// Runs investigation, requirement inference, policy planning, and the built-in
134/// probe pack. Applies the given [`BundleRedactionPolicy`] to sanitise sensitive
135/// fields before returning.
136///
137/// The T89 vendor classification is computed from the HAR using
138/// [`VendorClassifier::with_builtin_defaults`]. When no vendor-specific
139/// signals are detected, the resulting `vendor_classification` field is
140/// still populated with an "unknown" classification (so consumers can
141/// always inspect the field), but the JSON form omits it via
142/// `skip_serializing_if = "Option::is_none"`.
143/// Pass a custom classifier via
144/// [`build_diagnostic_bundle_with_vendor_classifier`] to override the
145/// threshold or supply custom vendor definitions.
146///
147/// # Errors
148///
149/// Returns [`BundleError::Har`] when the HAR payload is invalid.
150///
151/// # Example
152///
153/// ```rust
154/// use stygian_charon::bundle::{build_diagnostic_bundle, BundleRedactionPolicy};
155///
156/// let empty_har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[]}}"#;
157/// let bundle = build_diagnostic_bundle(empty_har, BundleRedactionPolicy::Standard).unwrap();
158/// assert!(bundle.probe_report.total > 0);
159/// ```
160pub fn build_diagnostic_bundle(
161    har_json: &str,
162    redaction_policy: BundleRedactionPolicy,
163) -> Result<DiagnosticBundle, BundleError> {
164    let classifier = VendorClassifier::with_builtin_defaults();
165    build_diagnostic_bundle_with_vendor_classifier(har_json, redaction_policy, &classifier)
166}
167
168/// Build a [`DiagnosticBundle`] including fingerprint coherence results.
169///
170/// Identical to [`build_diagnostic_bundle`] but also evaluates coherence rules
171/// against the supplied [`snapshot::NormalizedFingerprintSnapshot`].
172/// The vendor classification uses
173/// [`VendorClassifier::with_builtin_defaults`]; pass
174/// [`build_diagnostic_bundle_full`] to supply a custom classifier
175/// together with a snapshot.
176///
177/// # Errors
178///
179/// Returns [`BundleError::Har`] when the HAR payload is invalid.
180pub fn build_diagnostic_bundle_with_snapshot(
181    har_json: &str,
182    redaction_policy: BundleRedactionPolicy,
183    snap: &snapshot::NormalizedFingerprintSnapshot,
184) -> Result<DiagnosticBundle, BundleError> {
185    let classifier = VendorClassifier::with_builtin_defaults();
186    build_diagnostic_bundle_full(har_json, redaction_policy, &classifier, Some(snap))
187}
188
189/// Build a [`DiagnosticBundle`] with a caller-supplied
190/// [`VendorClassifier`].
191///
192/// Use this when the operator wants a custom threshold or
193/// additional [`crate::vendor_classifier::VendorDefinition`] entries
194/// (e.g. Tier 2 vendors).
195/// The classifier is **stateless**, so callers can build it once
196/// and reuse it across many `build_diagnostic_bundle_*` invocations.
197///
198/// # Errors
199///
200/// Returns [`BundleError::Har`] when the HAR payload is invalid.
201pub fn build_diagnostic_bundle_with_vendor_classifier(
202    har_json: &str,
203    redaction_policy: BundleRedactionPolicy,
204    classifier: &VendorClassifier,
205) -> Result<DiagnosticBundle, BundleError> {
206    build_diagnostic_bundle_full(har_json, redaction_policy, classifier, None)
207}
208
209/// Build a [`DiagnosticBundle`] with a caller-supplied
210/// [`VendorClassifier`] **and** an optional
211/// [`snapshot::NormalizedFingerprintSnapshot`].
212///
213/// This is the most general bundle constructor; every other builder
214/// delegates here.
215///
216/// # Errors
217///
218/// Returns [`BundleError::Har`] when the HAR payload is invalid.
219pub fn build_diagnostic_bundle_full(
220    har_json: &str,
221    redaction_policy: BundleRedactionPolicy,
222    classifier: &VendorClassifier,
223    snap: Option<&snapshot::NormalizedFingerprintSnapshot>,
224) -> Result<DiagnosticBundle, BundleError> {
225    let report = investigate_har(har_json)?;
226    let plan = plan_from_report(report);
227
228    let coherence_violations = snap.map_or_else(Vec::new, |s| {
229        let coherence = snapshot::evaluate_snapshot_coherence(s);
230        coherence
231            .violations
232            .into_iter()
233            .map(|v| BundleCoherenceViolation {
234                rule_id: v.rule_id,
235                message: v.message,
236                paths: v.paths,
237            })
238            .collect()
239    });
240
241    let probe_report = run_probe_pack(&challenge_probe_pack());
242    // The vendor classification is best-effort: if the HAR is
243    // unparseable for the classifier's purposes (it consumes the
244    // same HAR shape as `investigate_har`), we keep the
245    // `vendor_classification` field at `None` so the JSON form
246    // omits it via `skip_serializing_if`. We also drop the field
247    // when the classifier reports "unknown" with no evidence, so
248    // empty HARs do not produce noise in the diagnostic payload.
249    let vendor_classification = classifier
250        .classify_har(har_json)
251        .ok()
252        .filter(|c| c.is_identified() || !c.evidence.is_empty());
253
254    let mut bundle = DiagnosticBundle {
255        metadata: make_metadata(redaction_policy),
256        report: plan.report,
257        requirements: plan.requirements,
258        policy: plan.policy,
259        probe_report,
260        coherence_violations,
261        vendor_classification,
262    };
263
264    apply_redaction(&mut bundle);
265    Ok(bundle)
266}
267
268/// Convert an [`InvestigationBundle`] into a [`DiagnosticBundle`] (no HAR needed).
269///
270/// Useful when the caller already has an `InvestigationBundle` and only needs to
271/// enrich it with probe outcomes and metadata.
272#[must_use]
273pub fn diagnostic_bundle_from_investigation(
274    bundle: InvestigationBundle,
275    redaction_policy: BundleRedactionPolicy,
276) -> DiagnosticBundle {
277    let probe_report = run_probe_pack(&challenge_probe_pack());
278    let mut result = DiagnosticBundle {
279        metadata: make_metadata(redaction_policy),
280        report: bundle.report,
281        requirements: bundle.requirements,
282        policy: bundle.policy,
283        probe_report,
284        coherence_violations: Vec::new(),
285        vendor_classification: None,
286    };
287    apply_redaction(&mut result);
288    result
289}
290
291fn make_metadata(redaction_policy: BundleRedactionPolicy) -> BundleMetadata {
292    BundleMetadata {
293        schema_version: "1.0".to_string(),
294        assembled_at: chrono_now(),
295        redaction_policy,
296        annotations: BTreeMap::new(),
297    }
298}
299
300/// Produce an RFC 3339–like timestamp without pulling in `chrono`.
301fn chrono_now() -> String {
302    // std::time gives us seconds since UNIX epoch; we format it as an
303    // opaque but sortable string when a proper date library is not available.
304    use std::time::{SystemTime, UNIX_EPOCH};
305    let secs = SystemTime::now()
306        .duration_since(UNIX_EPOCH)
307        .map_or(0, |d| d.as_secs());
308    format!("unix:{secs}")
309}
310
311/// Apply the redaction policy to a bundle in place.
312pub fn apply_redaction(bundle: &mut DiagnosticBundle) {
313    match bundle.metadata.redaction_policy {
314        BundleRedactionPolicy::None => {}
315        BundleRedactionPolicy::Standard => redact_standard(bundle),
316        BundleRedactionPolicy::Aggressive => redact_aggressive(bundle),
317    }
318}
319
320const REDACTED: &str = "[REDACTED]";
321
322fn redact_standard(bundle: &mut DiagnosticBundle) {
323    // Redact URL credentials in request summaries.
324    for req in &mut bundle.report.suspicious_requests {
325        redact_url_credentials(&mut req.url);
326    }
327    // Redact page title if it looks like it contains credentials.
328    if let Some(title) = &mut bundle.report.page_title {
329        redact_url_credentials(title);
330    }
331}
332
333fn redact_aggressive(bundle: &mut DiagnosticBundle) {
334    // Redact all URL query strings from suspicious requests.
335    for req in &mut bundle.report.suspicious_requests {
336        redact_url_credentials(&mut req.url);
337        redact_url_query(&mut req.url);
338    }
339    // Clear all markers (may contain partial cookie/token values).
340    bundle.report.top_markers.clear();
341    bundle.report.marker_histogram.clear();
342    for req in &mut bundle.report.suspicious_requests {
343        req.detection.markers.clear();
344    }
345}
346
347fn redact_url_credentials(url: &mut String) {
348    // Replace `://user:pass@` style credentials.
349    if let Some(at_pos) = url.find('@')
350        && let Some(scheme_end) = url.find("://")
351    {
352        let after_scheme = scheme_end + 3;
353        if after_scheme < at_pos {
354            let scheme = url[..scheme_end].to_string();
355            let rest = url[at_pos + 1..].to_string();
356            *url = format!("{scheme}://{REDACTED}@{rest}");
357        }
358    }
359}
360
361fn redact_url_query(url: &mut String) {
362    if let Some(q) = url.find('?') {
363        url.truncate(q);
364        url.push('?');
365        url.push_str(REDACTED);
366    }
367}
368
369#[cfg(test)]
370#[allow(
371    clippy::unwrap_used,
372    clippy::expect_used,
373    clippy::panic,
374    clippy::indexing_slicing
375)]
376mod tests {
377    use super::*;
378
379    const EMPTY_HAR: &str =
380        r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[]}}"#;
381
382    #[test]
383    fn build_diagnostic_bundle_empty_har() {
384        let result = build_diagnostic_bundle(EMPTY_HAR, BundleRedactionPolicy::Standard);
385        assert!(result.is_ok(), "bundle build should succeed");
386        let Ok(bundle) = result else {
387            return;
388        };
389        assert_eq!(bundle.metadata.schema_version, "1.0");
390        assert_eq!(
391            bundle.metadata.redaction_policy,
392            BundleRedactionPolicy::Standard
393        );
394        assert!(bundle.probe_report.total > 0);
395        assert!(bundle.coherence_violations.is_empty());
396    }
397
398    #[test]
399    fn redaction_standard_masks_url_credentials() {
400        let har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[
401            {"startedDateTime":"2026-01-01T00:00:00Z","time":100,
402             "request":{"method":"GET","url":"https://user:pass@example.com/page","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},
403             "response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1",
404               "headers":[{"name":"cf-ray","value":"abc-LHR"},{"name":"server","value":"cloudflare"}],
405               "cookies":[],"content":{"size":0,"mimeType":"text/html"},"redirectURL":"","headersSize":-1,"bodySize":-1},
406             "cache":{},"timings":{"send":0,"wait":100,"receive":0}}
407        ]}}"#;
408
409        let result = build_diagnostic_bundle(har, BundleRedactionPolicy::Standard);
410        assert!(result.is_ok(), "bundle build should succeed");
411        let Ok(bundle) = result else {
412            return;
413        };
414        for req in &bundle.report.suspicious_requests {
415            assert!(
416                !req.url.contains("user:pass"),
417                "URL credentials should be redacted: {}",
418                req.url
419            );
420        }
421    }
422
423    #[test]
424    fn redaction_none_preserves_url_credentials() {
425        let har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[
426            {"startedDateTime":"2026-01-01T00:00:00Z","time":100,
427             "request":{"method":"GET","url":"https://user:pass@example.com/page","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},
428             "response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1",
429               "headers":[{"name":"cf-ray","value":"abc-LHR"},{"name":"server","value":"cloudflare"}],
430               "cookies":[],"content":{"size":0,"mimeType":"text/html"},"redirectURL":"","headersSize":-1,"bodySize":-1},
431             "cache":{},"timings":{"send":0,"wait":100,"receive":0}}
432        ]}}"#;
433
434        let result = build_diagnostic_bundle(har, BundleRedactionPolicy::None);
435        assert!(result.is_ok(), "bundle build should succeed");
436        let Ok(bundle) = result else {
437            return;
438        };
439        for req in &bundle.report.suspicious_requests {
440            assert!(
441                req.url.contains("user:pass"),
442                "URL credentials should be preserved with None policy: {}",
443                req.url
444            );
445        }
446    }
447
448    #[test]
449    fn bundle_metadata_schema_version_is_stable() {
450        let result = build_diagnostic_bundle(EMPTY_HAR, BundleRedactionPolicy::None);
451        assert!(result.is_ok(), "bundle build should succeed");
452        let Ok(bundle) = result else {
453            return;
454        };
455        assert_eq!(bundle.metadata.schema_version, "1.0");
456    }
457
458    #[test]
459    fn redact_url_credentials_removes_userinfo() {
460        let mut url = "https://user:pass@example.com/path".to_string();
461        redact_url_credentials(&mut url);
462        assert!(
463            !url.contains("user:pass"),
464            "URL credentials should be removed: {url}"
465        );
466        assert!(url.contains(REDACTED));
467    }
468
469    #[test]
470    fn redact_url_query_removes_query_string() {
471        let mut url = "https://example.com/path?token=secret&other=val".to_string();
472        redact_url_query(&mut url);
473        assert!(
474            !url.contains("secret"),
475            "query string should be removed: {url}"
476        );
477        assert!(url.contains('?'));
478    }
479}