Skip to main content

stygian_charon/
bundle.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::har;
6use crate::investigation::investigate_har;
7use crate::policy::plan_from_report;
8use crate::probe::{ProbePackReport, challenge_probe_pack, run_probe_pack};
9use crate::snapshot;
10use crate::types::{InvestigationBundle, InvestigationReport, RequirementsProfile, RuntimePolicy};
11
12/// Controls how sensitive fields are treated when serialising a [`DiagnosticBundle`].
13///
14/// # Example
15///
16/// ```rust
17/// use stygian_charon::bundle::BundleRedactionPolicy;
18///
19/// let policy = BundleRedactionPolicy::Standard;
20/// assert!(!matches!(policy, BundleRedactionPolicy::None));
21/// ```
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
23pub enum BundleRedactionPolicy {
24    /// No redaction — full detail retained (development / local use only).
25    None,
26    /// Redact cookies, auth headers, and URL credentials.
27    /// This is the **recommended default** for incident reporting.
28    #[default]
29    Standard,
30    /// Redact all response headers and URL query parameters in addition to
31    /// everything covered by `Standard`.
32    Aggressive,
33}
34
35/// Metadata fields attached to every [`DiagnosticBundle`].
36#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
37pub struct BundleMetadata {
38    /// Schema version of the bundle format (`"1.0"`).
39    pub schema_version: String,
40    /// RFC 3339 timestamp at which the bundle was assembled.
41    pub assembled_at: String,
42    /// Redaction policy applied to this bundle.
43    pub redaction_policy: BundleRedactionPolicy,
44    /// Arbitrary key/value annotations (tooling, environment, etc.).
45    #[serde(default)]
46    pub annotations: BTreeMap<String, String>,
47}
48
49/// Full diagnostic bundle for a single investigation.
50///
51/// The bundle aggregates the investigation report, requirements, policy,
52/// built-in probe outcomes, and optional fingerprint coherence results into a
53/// single document suitable for incident response tooling.
54///
55/// Sensitive fields (cookies, auth headers) are redacted according to
56/// [`BundleMetadata::redaction_policy`].
57///
58/// # Format
59///
60/// The bundle serialises to JSON via `serde`. Top-level fields are:
61/// - `metadata` — provenance and redaction policy
62/// - `report` — aggregated [`InvestigationReport`]
63/// - `requirements` — inferred [`RequirementsProfile`]
64/// - `policy` — planned [`RuntimePolicy`]
65/// - `probe_report` — outcome of the built-in [`challenge_probe_pack`]
66/// - `coherence_violations` — list of `{ rule_id, message, paths }` objects; empty when clean
67///
68/// # Example
69///
70/// ```rust
71/// use stygian_charon::bundle::{build_diagnostic_bundle, BundleRedactionPolicy};
72///
73/// let empty_har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[]}}"#;
74/// let bundle = build_diagnostic_bundle(empty_har, BundleRedactionPolicy::Standard).unwrap();
75/// assert_eq!(bundle.metadata.schema_version, "1.0");
76/// ```
77#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
78pub struct DiagnosticBundle {
79    /// Bundle provenance and redaction policy.
80    pub metadata: BundleMetadata,
81    /// Aggregated investigation report.
82    pub report: InvestigationReport,
83    /// Inferred requirements profile.
84    pub requirements: RequirementsProfile,
85    /// Planned runtime policy.
86    pub policy: RuntimePolicy,
87    /// Outcome of running the challenge probe pack against the built-in classifier.
88    pub probe_report: ProbePackReport,
89    /// Coherence violations across response headers in the investigation.
90    ///
91    /// Only populated when a [`snapshot::NormalizedFingerprintSnapshot`] is supplied
92    /// via [`build_diagnostic_bundle_with_snapshot`].
93    #[serde(default)]
94    pub coherence_violations: Vec<BundleCoherenceViolation>,
95}
96
97/// A redacted coherence violation record.
98#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
99pub struct BundleCoherenceViolation {
100    /// Stable rule identifier (e.g. `"user_agent_header_match"`).
101    pub rule_id: String,
102    /// Human-readable explanation.
103    pub message: String,
104    /// JSON-path-like pointers to the offending fields.
105    pub paths: Vec<String>,
106}
107
108/// Error type for diagnostic bundle construction.
109#[derive(Debug, thiserror::Error)]
110pub enum BundleError {
111    /// The HAR input could not be parsed.
112    #[error("HAR parse error: {0}")]
113    Har(#[from] har::HarError),
114}
115
116/// Build a [`DiagnosticBundle`] from a raw HAR payload.
117///
118/// Runs investigation, requirement inference, policy planning, and the built-in
119/// probe pack. Applies the given [`BundleRedactionPolicy`] to sanitise sensitive
120/// fields before returning.
121///
122/// # Errors
123///
124/// Returns [`BundleError::Har`] when the HAR payload is invalid.
125///
126/// # Example
127///
128/// ```rust
129/// use stygian_charon::bundle::{build_diagnostic_bundle, BundleRedactionPolicy};
130///
131/// let empty_har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[]}}"#;
132/// let bundle = build_diagnostic_bundle(empty_har, BundleRedactionPolicy::Standard).unwrap();
133/// assert!(bundle.probe_report.total > 0);
134/// ```
135pub fn build_diagnostic_bundle(
136    har_json: &str,
137    redaction_policy: BundleRedactionPolicy,
138) -> Result<DiagnosticBundle, BundleError> {
139    build_diagnostic_bundle_inner(har_json, redaction_policy, None)
140}
141
142/// Build a [`DiagnosticBundle`] including fingerprint coherence results.
143///
144/// Identical to [`build_diagnostic_bundle`] but also evaluates coherence rules
145/// against the supplied [`snapshot::NormalizedFingerprintSnapshot`].
146///
147/// # Errors
148///
149/// Returns [`BundleError::Har`] when the HAR payload is invalid.
150pub fn build_diagnostic_bundle_with_snapshot(
151    har_json: &str,
152    redaction_policy: BundleRedactionPolicy,
153    snap: &snapshot::NormalizedFingerprintSnapshot,
154) -> Result<DiagnosticBundle, BundleError> {
155    build_diagnostic_bundle_inner(har_json, redaction_policy, Some(snap))
156}
157
158/// Convert an [`InvestigationBundle`] into a [`DiagnosticBundle`] (no HAR needed).
159///
160/// Useful when the caller already has an `InvestigationBundle` and only needs to
161/// enrich it with probe outcomes and metadata.
162#[must_use]
163pub fn diagnostic_bundle_from_investigation(
164    bundle: InvestigationBundle,
165    redaction_policy: BundleRedactionPolicy,
166) -> DiagnosticBundle {
167    let probe_report = run_probe_pack(&challenge_probe_pack());
168    let mut result = DiagnosticBundle {
169        metadata: make_metadata(redaction_policy),
170        report: bundle.report,
171        requirements: bundle.requirements,
172        policy: bundle.policy,
173        probe_report,
174        coherence_violations: Vec::new(),
175    };
176    apply_redaction(&mut result);
177    result
178}
179
180fn build_diagnostic_bundle_inner(
181    har_json: &str,
182    redaction_policy: BundleRedactionPolicy,
183    snap: Option<&snapshot::NormalizedFingerprintSnapshot>,
184) -> Result<DiagnosticBundle, BundleError> {
185    let report = investigate_har(har_json)?;
186    let plan = plan_from_report(report);
187
188    let coherence_violations = snap.map_or_else(Vec::new, |s| {
189        let coherence = snapshot::evaluate_snapshot_coherence(s);
190        coherence
191            .violations
192            .into_iter()
193            .map(|v| BundleCoherenceViolation {
194                rule_id: v.rule_id,
195                message: v.message,
196                paths: v.paths,
197            })
198            .collect()
199    });
200
201    let probe_report = run_probe_pack(&challenge_probe_pack());
202
203    let mut bundle = DiagnosticBundle {
204        metadata: make_metadata(redaction_policy),
205        report: plan.report,
206        requirements: plan.requirements,
207        policy: plan.policy,
208        probe_report,
209        coherence_violations,
210    };
211
212    apply_redaction(&mut bundle);
213    Ok(bundle)
214}
215
216fn make_metadata(redaction_policy: BundleRedactionPolicy) -> BundleMetadata {
217    BundleMetadata {
218        schema_version: "1.0".to_string(),
219        assembled_at: chrono_now(),
220        redaction_policy,
221        annotations: BTreeMap::new(),
222    }
223}
224
225/// Produce an RFC 3339–like timestamp without pulling in `chrono`.
226fn chrono_now() -> String {
227    // std::time gives us seconds since UNIX epoch; we format it as an
228    // opaque but sortable string when a proper date library is not available.
229    use std::time::{SystemTime, UNIX_EPOCH};
230    let secs = SystemTime::now()
231        .duration_since(UNIX_EPOCH)
232        .map_or(0, |d| d.as_secs());
233    format!("unix:{secs}")
234}
235
236/// Apply the redaction policy to a bundle in place.
237pub fn apply_redaction(bundle: &mut DiagnosticBundle) {
238    match bundle.metadata.redaction_policy {
239        BundleRedactionPolicy::None => {}
240        BundleRedactionPolicy::Standard => redact_standard(bundle),
241        BundleRedactionPolicy::Aggressive => redact_aggressive(bundle),
242    }
243}
244
245const REDACTED: &str = "[REDACTED]";
246
247fn redact_standard(bundle: &mut DiagnosticBundle) {
248    // Redact URL credentials in request summaries.
249    for req in &mut bundle.report.suspicious_requests {
250        redact_url_credentials(&mut req.url);
251    }
252    // Redact page title if it looks like it contains credentials.
253    if let Some(title) = &mut bundle.report.page_title {
254        redact_url_credentials(title);
255    }
256}
257
258fn redact_aggressive(bundle: &mut DiagnosticBundle) {
259    // Redact all URL query strings from suspicious requests.
260    for req in &mut bundle.report.suspicious_requests {
261        redact_url_credentials(&mut req.url);
262        redact_url_query(&mut req.url);
263    }
264    // Clear all markers (may contain partial cookie/token values).
265    bundle.report.top_markers.clear();
266    bundle.report.marker_histogram.clear();
267    for req in &mut bundle.report.suspicious_requests {
268        req.detection.markers.clear();
269    }
270}
271
272fn redact_url_credentials(url: &mut String) {
273    // Replace `://user:pass@` style credentials.
274    if let Some(at_pos) = url.find('@')
275        && let Some(scheme_end) = url.find("://")
276    {
277        let after_scheme = scheme_end + 3;
278        if after_scheme < at_pos {
279            let scheme = url[..scheme_end].to_string();
280            let rest = url[at_pos + 1..].to_string();
281            *url = format!("{scheme}://{REDACTED}@{rest}");
282        }
283    }
284}
285
286fn redact_url_query(url: &mut String) {
287    if let Some(q) = url.find('?') {
288        url.truncate(q);
289        url.push('?');
290        url.push_str(REDACTED);
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    const EMPTY_HAR: &str =
299        r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[]}}"#;
300
301    #[test]
302    fn build_diagnostic_bundle_empty_har() {
303        let result = build_diagnostic_bundle(EMPTY_HAR, BundleRedactionPolicy::Standard);
304        assert!(result.is_ok(), "bundle build should succeed");
305        let Ok(bundle) = result else {
306            return;
307        };
308        assert_eq!(bundle.metadata.schema_version, "1.0");
309        assert_eq!(
310            bundle.metadata.redaction_policy,
311            BundleRedactionPolicy::Standard
312        );
313        assert!(bundle.probe_report.total > 0);
314        assert!(bundle.coherence_violations.is_empty());
315    }
316
317    #[test]
318    fn redaction_standard_masks_url_credentials() {
319        let har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[
320            {"startedDateTime":"2026-01-01T00:00:00Z","time":100,
321             "request":{"method":"GET","url":"https://user:pass@example.com/page","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},
322             "response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1",
323               "headers":[{"name":"cf-ray","value":"abc-LHR"},{"name":"server","value":"cloudflare"}],
324               "cookies":[],"content":{"size":0,"mimeType":"text/html"},"redirectURL":"","headersSize":-1,"bodySize":-1},
325             "cache":{},"timings":{"send":0,"wait":100,"receive":0}}
326        ]}}"#;
327
328        let result = build_diagnostic_bundle(har, BundleRedactionPolicy::Standard);
329        assert!(result.is_ok(), "bundle build should succeed");
330        let Ok(bundle) = result else {
331            return;
332        };
333        for req in &bundle.report.suspicious_requests {
334            assert!(
335                !req.url.contains("user:pass"),
336                "URL credentials should be redacted: {}",
337                req.url
338            );
339        }
340    }
341
342    #[test]
343    fn redaction_none_preserves_url_credentials() {
344        let har = r#"{"log":{"version":"1.2","creator":{"name":"test","version":"0"},"entries":[
345            {"startedDateTime":"2026-01-01T00:00:00Z","time":100,
346             "request":{"method":"GET","url":"https://user:pass@example.com/page","httpVersion":"HTTP/1.1","headers":[],"queryString":[],"cookies":[],"headersSize":-1,"bodySize":-1},
347             "response":{"status":403,"statusText":"Forbidden","httpVersion":"HTTP/1.1",
348               "headers":[{"name":"cf-ray","value":"abc-LHR"},{"name":"server","value":"cloudflare"}],
349               "cookies":[],"content":{"size":0,"mimeType":"text/html"},"redirectURL":"","headersSize":-1,"bodySize":-1},
350             "cache":{},"timings":{"send":0,"wait":100,"receive":0}}
351        ]}}"#;
352
353        let result = build_diagnostic_bundle(har, BundleRedactionPolicy::None);
354        assert!(result.is_ok(), "bundle build should succeed");
355        let Ok(bundle) = result else {
356            return;
357        };
358        for req in &bundle.report.suspicious_requests {
359            assert!(
360                req.url.contains("user:pass"),
361                "URL credentials should be preserved with None policy: {}",
362                req.url
363            );
364        }
365    }
366
367    #[test]
368    fn bundle_metadata_schema_version_is_stable() {
369        let result = build_diagnostic_bundle(EMPTY_HAR, BundleRedactionPolicy::None);
370        assert!(result.is_ok(), "bundle build should succeed");
371        let Ok(bundle) = result else {
372            return;
373        };
374        assert_eq!(bundle.metadata.schema_version, "1.0");
375    }
376
377    #[test]
378    fn redact_url_credentials_removes_userinfo() {
379        let mut url = "https://user:pass@example.com/path".to_string();
380        redact_url_credentials(&mut url);
381        assert!(
382            !url.contains("user:pass"),
383            "URL credentials should be removed: {url}"
384        );
385        assert!(url.contains(REDACTED));
386    }
387
388    #[test]
389    fn redact_url_query_removes_query_string() {
390        let mut url = "https://example.com/path?token=secret&other=val".to_string();
391        redact_url_query(&mut url);
392        assert!(
393            !url.contains("secret"),
394            "query string should be removed: {url}"
395        );
396        assert!(url.contains('?'));
397    }
398}