Skip to main content

stygian_charon/
snapshot.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use serde::{Deserialize, Serialize};
4
5// ── Input Size Limits ───────────────────────────────────────────────────────
6// Bounds on snapshot field sizes to prevent resource exhaustion and DoS.
7
8/// Maximum JSON payload size for a snapshot (10 MB).
9const MAX_SNAPSHOT_JSON_BYTES: usize = 10 * 1024 * 1024;
10
11/// Maximum length for string fields like `user_agent`, `platform`, `timezone`.
12const MAX_STRING_FIELD_BYTES: usize = 4_096;
13
14/// Maximum length for hash fields like `ja3_hash`, `snapshot_id`.
15const MAX_HASH_FIELD_BYTES: usize = 1_024;
16
17/// Maximum number of header entries in signals.headers.
18const MAX_HEADERS_ENTRIES: usize = 256;
19
20/// Maximum number of feature flags in signals.features.
21const MAX_FEATURES_ENTRIES: usize = 256;
22
23/// Maximum number of metadata entries.
24const MAX_METADATA_ENTRIES: usize = 128;
25
26/// Normalized capture mode for a fingerprint snapshot.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28pub enum SnapshotMode {
29    /// Snapshot captured from HTTP-oriented execution.
30    Http,
31    /// Snapshot captured from browser-oriented execution.
32    Browser,
33    /// Snapshot combines HTTP and browser surfaces.
34    Hybrid,
35}
36
37/// Screen-related fingerprint surface.
38#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
39pub struct ScreenFingerprint {
40    /// Screen width in CSS pixels.
41    pub width: u32,
42    /// Screen height in CSS pixels.
43    pub height: u32,
44    /// Device pixel ratio.
45    pub device_pixel_ratio: f64,
46}
47
48/// WebGL-related fingerprint surface.
49#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
50pub struct WebGlFingerprint {
51    /// WebGL vendor string.
52    pub vendor: String,
53    /// WebGL renderer string.
54    pub renderer: String,
55}
56
57/// TLS-related fingerprint surface.
58#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
59pub struct TlsFingerprint {
60    /// JA3 hash for the observed TLS handshake.
61    pub ja3_hash: String,
62    /// Optional JA4 fingerprint.
63    pub ja4: Option<String>,
64}
65
66/// Signal payload for normalized fingerprint snapshots.
67#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
68pub struct FingerprintSignals {
69    /// User-Agent string.
70    pub user_agent: String,
71    /// Accept-Language header value.
72    pub accept_language: String,
73    /// Platform indicator.
74    pub platform: String,
75    /// Timezone identifier.
76    pub timezone: String,
77    /// Header-level snapshot values.
78    pub headers: BTreeMap<String, String>,
79    /// Boolean feature flags.
80    pub features: BTreeMap<String, bool>,
81    /// Screen metrics.
82    pub screen: ScreenFingerprint,
83    /// WebGL surface (required for browser mode).
84    pub webgl: Option<WebGlFingerprint>,
85    /// TLS surface (required for HTTP mode).
86    pub tls: Option<TlsFingerprint>,
87}
88
89/// Versioned normalized fingerprint snapshot across modes.
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
91pub struct NormalizedFingerprintSnapshot {
92    /// Snapshot schema version (`major.minor.patch`).
93    pub schema_version: String,
94    /// Stable unique snapshot id.
95    pub snapshot_id: String,
96    /// Capture mode.
97    pub mode: SnapshotMode,
98    /// RFC 3339 timestamp of capture.
99    pub captured_at: String,
100    /// Fingerprint signal payload.
101    pub signals: FingerprintSignals,
102    /// Optional metadata for provenance and notes.
103    #[serde(default)]
104    pub metadata: BTreeMap<String, String>,
105    /// Deprecated UA mirror retained for backward compatibility.
106    #[serde(default)]
107    pub legacy_user_agent: Option<String>,
108    /// Deprecated JA3 mirror retained for backward compatibility.
109    #[serde(default)]
110    pub legacy_ja3_hash: Option<String>,
111}
112
113/// Compatibility validation error for normalized snapshots.
114#[derive(Debug, thiserror::Error, PartialEq, Eq)]
115pub enum SnapshotCompatibilityError {
116    /// Schema version is not parseable as semver-like `major.minor.patch`.
117    #[error("invalid schema version: {0}")]
118    InvalidSchemaVersion(String),
119    /// Snapshot major version is not supported by this reader.
120    #[error("unsupported schema major version: {0}")]
121    UnsupportedSchemaMajor(u64),
122    /// A mode-required signal is missing.
123    #[error("missing required signal '{signal}' for mode {mode:?}")]
124    MissingModeSignal {
125        /// Snapshot mode.
126        mode: SnapshotMode,
127        /// Required signal name.
128        signal: &'static str,
129    },
130    /// Deprecated mirror field is inconsistent with canonical field.
131    #[error("deprecated field '{field}' does not match canonical field")]
132    LegacyFieldMismatch {
133        /// Deprecated field name.
134        field: &'static str,
135    },
136    /// Input validation failed due to size or structure constraints.
137    #[error("input validation failed: {0}")]
138    InputValidation(&'static str),
139}
140
141/// Error returned when building deterministic snapshot bytes.
142#[derive(Debug, thiserror::Error, PartialEq, Eq)]
143pub enum SnapshotCollectionError {
144    /// Snapshot failed compatibility validation.
145    #[error("compatibility validation failed: {0}")]
146    Compatibility(#[from] SnapshotCompatibilityError),
147    /// Snapshot serialization failed.
148    #[error("serialization failed: {0}")]
149    Serialization(String),
150}
151
152/// Kind of signal-level drift detected between baseline and candidate snapshots.
153#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
154pub enum SnapshotSignalDriftKind {
155    /// A signal path exists only in candidate.
156    Added,
157    /// A signal path exists only in baseline.
158    Removed,
159    /// A signal path exists in both snapshots but the value changed.
160    Changed,
161}
162
163/// Focused signal-level drift entry.
164#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
165pub struct SnapshotSignalDrift {
166    /// Dot-path to the changed signal key, rooted at `signals`.
167    pub path: String,
168    /// Difference kind for this path.
169    pub kind: SnapshotSignalDriftKind,
170    /// Baseline value encoded as compact JSON when present.
171    pub baseline: Option<String>,
172    /// Candidate value encoded as compact JSON when present.
173    pub candidate: Option<String>,
174}
175
176/// Signal-focused drift report for baseline vs candidate snapshots.
177#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
178pub struct SnapshotDriftReport {
179    /// Signal-level differences discovered after deterministic normalization.
180    pub diffs: Vec<SnapshotSignalDrift>,
181}
182
183/// Machine-readable coherence violation produced by a snapshot rule.
184#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
185pub struct SnapshotCoherenceViolation {
186    /// Stable identifier for the violated rule.
187    pub rule_id: String,
188    /// Human-readable explanation of the mismatch.
189    pub message: String,
190    /// Dot-paths participating in the violation.
191    pub paths: Vec<String>,
192}
193
194/// Result of evaluating all registered snapshot coherence rules.
195#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
196pub struct SnapshotCoherenceReport {
197    /// Violations returned by the active rule set.
198    pub violations: Vec<SnapshotCoherenceViolation>,
199}
200
201impl SnapshotCoherenceReport {
202    /// Return `true` when any coherence rule was violated.
203    #[must_use]
204    pub const fn has_violations(&self) -> bool {
205        !self.violations.is_empty()
206    }
207}
208
209impl SnapshotDriftReport {
210    /// Return `true` when any signal drift was detected.
211    #[must_use]
212    pub const fn has_drift(&self) -> bool {
213        !self.diffs.is_empty()
214    }
215
216    /// Render a focused, line-oriented diff for changed signal paths.
217    #[must_use]
218    pub fn render_focused_diff(&self) -> String {
219        if self.diffs.is_empty() {
220            return "no signal drift detected".to_string();
221        }
222
223        self.diffs
224            .iter()
225            .map(|entry| match entry.kind {
226                SnapshotSignalDriftKind::Added => format!(
227                    "{} added: {}",
228                    entry.path,
229                    entry.candidate.as_deref().unwrap_or("null")
230                ),
231                SnapshotSignalDriftKind::Removed => format!(
232                    "{} removed: {}",
233                    entry.path,
234                    entry.baseline.as_deref().unwrap_or("null")
235                ),
236                SnapshotSignalDriftKind::Changed => format!(
237                    "{} changed: {} -> {}",
238                    entry.path,
239                    entry.baseline.as_deref().unwrap_or("null"),
240                    entry.candidate.as_deref().unwrap_or("null")
241                ),
242            })
243            .collect::<Vec<_>>()
244            .join("\n")
245    }
246}
247
248/// Options controlling deterministic snapshot collection.
249#[derive(Debug, Clone, PartialEq, Eq)]
250pub struct SnapshotDeterminismOptions {
251    /// Normalize `captured_at` to a stable placeholder timestamp.
252    pub normalize_captured_at: bool,
253    /// Remove volatile metadata keys before serialization.
254    pub strip_volatile_metadata: bool,
255}
256
257impl Default for SnapshotDeterminismOptions {
258    fn default() -> Self {
259        Self {
260            normalize_captured_at: true,
261            strip_volatile_metadata: true,
262        }
263    }
264}
265
266const DETERMINISTIC_CAPTURED_AT: &str = "1970-01-01T00:00:00Z";
267const VOLATILE_METADATA_KEYS: &[&str] = &[
268    "capture_nonce",
269    "generated_at",
270    "request_id",
271    "run_id",
272    "session_id",
273    "trace_id",
274];
275
276type SnapshotCoherenceRule =
277    fn(&NormalizedFingerprintSnapshot) -> Option<SnapshotCoherenceViolation>;
278
279const SNAPSHOT_COHERENCE_RULES: &[SnapshotCoherenceRule] = &[
280    rule_user_agent_header_matches,
281    rule_accept_language_header_matches,
282    rule_browser_webdriver_disabled,
283    rule_webgl_fields_populated,
284    rule_tls_fields_populated,
285];
286
287/// Normalize a snapshot in-place for deterministic collection.
288pub fn normalize_snapshot_for_determinism(
289    snapshot: &mut NormalizedFingerprintSnapshot,
290    options: &SnapshotDeterminismOptions,
291) {
292    if options.normalize_captured_at {
293        snapshot.captured_at = DETERMINISTIC_CAPTURED_AT.to_string();
294    }
295
296    if options.strip_volatile_metadata {
297        for key in VOLATILE_METADATA_KEYS {
298            snapshot.metadata.remove(*key);
299        }
300    }
301}
302
303/// Serialize a snapshot into deterministic JSON bytes.
304///
305/// The function first validates snapshot compatibility, then applies
306/// deterministic normalization rules, and finally serializes with a stable
307/// field order (provided by struct declaration order + `BTreeMap` keys).
308///
309/// # Errors
310///
311/// Returns [`SnapshotCollectionError`] when snapshot compatibility validation
312/// fails or when JSON serialization fails.
313pub fn collect_deterministic_snapshot_bytes(
314    snapshot: &NormalizedFingerprintSnapshot,
315    options: &SnapshotDeterminismOptions,
316) -> Result<Vec<u8>, SnapshotCollectionError> {
317    validate_snapshot_compatibility(snapshot)?;
318
319    let mut normalized = snapshot.clone();
320    normalize_snapshot_for_determinism(&mut normalized, options);
321
322    let bytes = serde_json::to_vec(&normalized)
323        .map_err(|error| SnapshotCollectionError::Serialization(error.to_string()))?;
324
325    if bytes.len() > MAX_SNAPSHOT_JSON_BYTES {
326        return Err(SnapshotCollectionError::Serialization(
327            "snapshot JSON exceeds maximum allowed size".to_string(),
328        ));
329    }
330
331    Ok(bytes)
332}
333
334/// Compare baseline and candidate snapshots for deterministic, signal-focused drift.
335///
336/// The comparison validates both snapshots, applies deterministic normalization,
337/// and reports only differences under the `signals` subtree.
338///
339/// # Errors
340///
341/// Returns [`SnapshotCollectionError`] when either snapshot fails compatibility
342/// validation or deterministic serialization.
343pub fn compare_snapshot_signal_drift(
344    baseline: &NormalizedFingerprintSnapshot,
345    candidate: &NormalizedFingerprintSnapshot,
346    options: &SnapshotDeterminismOptions,
347) -> Result<SnapshotDriftReport, SnapshotCollectionError> {
348    let baseline_bytes = collect_deterministic_snapshot_bytes(baseline, options)?;
349    let candidate_bytes = collect_deterministic_snapshot_bytes(candidate, options)?;
350
351    let baseline_normalized: NormalizedFingerprintSnapshot =
352        serde_json::from_slice(&baseline_bytes)
353            .map_err(|error| SnapshotCollectionError::Serialization(error.to_string()))?;
354    let candidate_normalized: NormalizedFingerprintSnapshot =
355        serde_json::from_slice(&candidate_bytes)
356            .map_err(|error| SnapshotCollectionError::Serialization(error.to_string()))?;
357
358    let baseline_signals = serde_json::to_value(&baseline_normalized.signals)
359        .map_err(|error| SnapshotCollectionError::Serialization(error.to_string()))?;
360    let candidate_signals = serde_json::to_value(&candidate_normalized.signals)
361        .map_err(|error| SnapshotCollectionError::Serialization(error.to_string()))?;
362
363    let mut diffs = Vec::new();
364    collect_signal_diffs("signals", &baseline_signals, &candidate_signals, &mut diffs);
365
366    Ok(SnapshotDriftReport { diffs })
367}
368
369/// Evaluate registered coherence rules across normalized snapshot fields.
370#[must_use]
371pub fn evaluate_snapshot_coherence(
372    snapshot: &NormalizedFingerprintSnapshot,
373) -> SnapshotCoherenceReport {
374    let violations = SNAPSHOT_COHERENCE_RULES
375        .iter()
376        .filter_map(|rule| rule(snapshot))
377        .collect();
378
379    SnapshotCoherenceReport { violations }
380}
381
382fn collect_signal_diffs(
383    path: &str,
384    baseline: &serde_json::Value,
385    candidate: &serde_json::Value,
386    diffs: &mut Vec<SnapshotSignalDrift>,
387) {
388    match (baseline, candidate) {
389        (serde_json::Value::Object(left), serde_json::Value::Object(right)) => {
390            let keys: BTreeSet<&String> = left.keys().chain(right.keys()).collect();
391            for key in keys {
392                let next_path = format!("{path}.{key}");
393                match (left.get(key), right.get(key)) {
394                    (Some(left_value), Some(right_value)) => {
395                        collect_signal_diffs(&next_path, left_value, right_value, diffs);
396                    }
397                    (Some(left_value), None) => {
398                        diffs.push(SnapshotSignalDrift {
399                            path: next_path,
400                            kind: SnapshotSignalDriftKind::Removed,
401                            baseline: Some(left_value.to_string()),
402                            candidate: None,
403                        });
404                    }
405                    (None, Some(right_value)) => {
406                        diffs.push(SnapshotSignalDrift {
407                            path: next_path,
408                            kind: SnapshotSignalDriftKind::Added,
409                            baseline: None,
410                            candidate: Some(right_value.to_string()),
411                        });
412                    }
413                    (None, None) => {}
414                }
415            }
416        }
417        _ => {
418            if baseline != candidate {
419                diffs.push(SnapshotSignalDrift {
420                    path: path.to_string(),
421                    kind: SnapshotSignalDriftKind::Changed,
422                    baseline: Some(baseline.to_string()),
423                    candidate: Some(candidate.to_string()),
424                });
425            }
426        }
427    }
428}
429
430fn parse_schema_major(version: &str) -> Result<u64, SnapshotCompatibilityError> {
431    let parts = version.split('.').collect::<Vec<_>>();
432    let [major, minor, patch] = parts.as_slice() else {
433        return Err(SnapshotCompatibilityError::InvalidSchemaVersion(
434            version.to_string(),
435        ));
436    };
437
438    if major.parse::<u64>().is_err()
439        || minor.parse::<u64>().is_err()
440        || patch.parse::<u64>().is_err()
441    {
442        return Err(SnapshotCompatibilityError::InvalidSchemaVersion(
443            version.to_string(),
444        ));
445    }
446
447    major
448        .parse::<u64>()
449        .map_err(|_| SnapshotCompatibilityError::InvalidSchemaVersion(version.to_string()))
450}
451
452const fn validate_max_len(
453    value: &str,
454    max_len: usize,
455    message: &'static str,
456) -> Result<(), SnapshotCompatibilityError> {
457    if value.len() > max_len {
458        return Err(SnapshotCompatibilityError::InputValidation(message));
459    }
460
461    Ok(())
462}
463
464fn validate_signal_text_fields(
465    signals: &FingerprintSignals,
466) -> Result<(), SnapshotCompatibilityError> {
467    validate_max_len(
468        &signals.user_agent,
469        MAX_STRING_FIELD_BYTES,
470        "signals.user_agent exceeds maximum length",
471    )?;
472    validate_max_len(
473        &signals.accept_language,
474        MAX_STRING_FIELD_BYTES,
475        "signals.accept_language exceeds maximum length",
476    )?;
477    validate_max_len(
478        &signals.platform,
479        MAX_STRING_FIELD_BYTES,
480        "signals.platform exceeds maximum length",
481    )?;
482    validate_max_len(
483        &signals.timezone,
484        MAX_STRING_FIELD_BYTES,
485        "signals.timezone exceeds maximum length",
486    )
487}
488
489fn validate_signal_collections(
490    signals: &FingerprintSignals,
491) -> Result<(), SnapshotCompatibilityError> {
492    if signals.headers.len() > MAX_HEADERS_ENTRIES {
493        return Err(SnapshotCompatibilityError::InputValidation(
494            "signals.headers exceeds maximum entries",
495        ));
496    }
497    if signals.features.len() > MAX_FEATURES_ENTRIES {
498        return Err(SnapshotCompatibilityError::InputValidation(
499            "signals.features exceeds maximum entries",
500        ));
501    }
502
503    for (key, value) in &signals.headers {
504        validate_max_len(
505            key,
506            MAX_STRING_FIELD_BYTES,
507            "header key exceeds maximum length",
508        )?;
509        validate_max_len(
510            value,
511            MAX_STRING_FIELD_BYTES,
512            "header value exceeds maximum length",
513        )?;
514    }
515
516    Ok(())
517}
518
519fn validate_optional_signal_fields(
520    signals: &FingerprintSignals,
521) -> Result<(), SnapshotCompatibilityError> {
522    if let Some(webgl) = &signals.webgl {
523        validate_max_len(
524            &webgl.vendor,
525            MAX_STRING_FIELD_BYTES,
526            "signals.webgl.vendor exceeds maximum length",
527        )?;
528        validate_max_len(
529            &webgl.renderer,
530            MAX_STRING_FIELD_BYTES,
531            "signals.webgl.renderer exceeds maximum length",
532        )?;
533    }
534
535    if let Some(tls) = &signals.tls {
536        validate_max_len(
537            &tls.ja3_hash,
538            MAX_HASH_FIELD_BYTES,
539            "signals.tls.ja3_hash exceeds maximum length",
540        )?;
541        if let Some(ja4) = &tls.ja4
542            && ja4.len() > MAX_HASH_FIELD_BYTES
543        {
544            return Err(SnapshotCompatibilityError::InputValidation(
545                "signals.tls.ja4 exceeds maximum length",
546            ));
547        }
548    }
549
550    Ok(())
551}
552
553fn validate_metadata_fields(
554    snapshot: &NormalizedFingerprintSnapshot,
555) -> Result<(), SnapshotCompatibilityError> {
556    if snapshot.metadata.len() > MAX_METADATA_ENTRIES {
557        return Err(SnapshotCompatibilityError::InputValidation(
558            "metadata exceeds maximum entries",
559        ));
560    }
561
562    for (key, value) in &snapshot.metadata {
563        if key.len() > MAX_HASH_FIELD_BYTES || value.len() > MAX_STRING_FIELD_BYTES {
564            return Err(SnapshotCompatibilityError::InputValidation(
565                "metadata entry exceeds maximum size",
566            ));
567        }
568    }
569
570    Ok(())
571}
572
573const fn validate_legacy_field_sizes(
574    snapshot: &NormalizedFingerprintSnapshot,
575) -> Result<(), SnapshotCompatibilityError> {
576    if let Some(legacy_ua) = &snapshot.legacy_user_agent
577        && legacy_ua.len() > MAX_STRING_FIELD_BYTES
578    {
579        return Err(SnapshotCompatibilityError::InputValidation(
580            "legacy_user_agent exceeds maximum length",
581        ));
582    }
583    if let Some(legacy_ja3) = &snapshot.legacy_ja3_hash
584        && legacy_ja3.len() > MAX_HASH_FIELD_BYTES
585    {
586        return Err(SnapshotCompatibilityError::InputValidation(
587            "legacy_ja3_hash exceeds maximum length",
588        ));
589    }
590
591    Ok(())
592}
593
594/// Validate input sizes for a snapshot to prevent resource exhaustion.
595///
596/// # Errors
597///
598/// Returns [`SnapshotCompatibilityError::InputValidation`] when any size limit is exceeded.
599fn validate_snapshot_input_sizes(
600    snapshot: &NormalizedFingerprintSnapshot,
601) -> Result<(), SnapshotCompatibilityError> {
602    validate_max_len(
603        &snapshot.schema_version,
604        MAX_STRING_FIELD_BYTES,
605        "schema_version exceeds maximum length",
606    )?;
607    validate_max_len(
608        &snapshot.snapshot_id,
609        MAX_HASH_FIELD_BYTES,
610        "snapshot_id exceeds maximum length",
611    )?;
612    validate_max_len(
613        &snapshot.captured_at,
614        MAX_STRING_FIELD_BYTES,
615        "captured_at exceeds maximum length",
616    )?;
617
618    let signals = &snapshot.signals;
619    validate_signal_text_fields(signals)?;
620    validate_signal_collections(signals)?;
621    validate_optional_signal_fields(signals)?;
622    validate_metadata_fields(snapshot)?;
623    validate_legacy_field_sizes(snapshot)
624}
625
626fn signal_header<'a>(snapshot: &'a NormalizedFingerprintSnapshot, name: &str) -> Option<&'a str> {
627    snapshot
628        .signals
629        .headers
630        .iter()
631        .find(|(key, _)| key.eq_ignore_ascii_case(name))
632        .map(|(_, value)| value.as_str())
633}
634
635fn mismatch_violation(
636    rule_id: &'static str,
637    message: impl Into<String>,
638    paths: &[&str],
639) -> SnapshotCoherenceViolation {
640    SnapshotCoherenceViolation {
641        rule_id: rule_id.to_string(),
642        message: message.into(),
643        paths: paths.iter().map(|path| (*path).to_string()).collect(),
644    }
645}
646
647fn rule_user_agent_header_matches(
648    snapshot: &NormalizedFingerprintSnapshot,
649) -> Option<SnapshotCoherenceViolation> {
650    let header = signal_header(snapshot, "user-agent")?;
651    if header == snapshot.signals.user_agent {
652        return None;
653    }
654
655    Some(mismatch_violation(
656        "user_agent_header_match",
657        "signals.user_agent does not match signals.headers.user-agent",
658        &["signals.user_agent", "signals.headers.user-agent"],
659    ))
660}
661
662fn rule_accept_language_header_matches(
663    snapshot: &NormalizedFingerprintSnapshot,
664) -> Option<SnapshotCoherenceViolation> {
665    let header = signal_header(snapshot, "accept-language")?;
666    if header == snapshot.signals.accept_language {
667        return None;
668    }
669
670    Some(mismatch_violation(
671        "accept_language_header_match",
672        "signals.accept_language does not match signals.headers.accept-language",
673        &["signals.accept_language", "signals.headers.accept-language"],
674    ))
675}
676
677fn rule_browser_webdriver_disabled(
678    snapshot: &NormalizedFingerprintSnapshot,
679) -> Option<SnapshotCoherenceViolation> {
680    if snapshot.mode == SnapshotMode::Http {
681        return None;
682    }
683
684    if snapshot
685        .signals
686        .features
687        .get("navigator.webdriver")
688        .copied()
689        != Some(true)
690    {
691        return None;
692    }
693
694    Some(mismatch_violation(
695        "navigator_webdriver_disabled",
696        "browser-oriented snapshots should not report navigator.webdriver=true",
697        &["mode", "signals.features.navigator.webdriver"],
698    ))
699}
700
701fn rule_webgl_fields_populated(
702    snapshot: &NormalizedFingerprintSnapshot,
703) -> Option<SnapshotCoherenceViolation> {
704    let webgl = snapshot.signals.webgl.as_ref()?;
705    if !webgl.vendor.trim().is_empty() && !webgl.renderer.trim().is_empty() {
706        return None;
707    }
708
709    Some(mismatch_violation(
710        "webgl_fields_populated",
711        "signals.webgl vendor and renderer must both be populated when webgl is present",
712        &["signals.webgl.vendor", "signals.webgl.renderer"],
713    ))
714}
715
716fn rule_tls_fields_populated(
717    snapshot: &NormalizedFingerprintSnapshot,
718) -> Option<SnapshotCoherenceViolation> {
719    let tls = snapshot.signals.tls.as_ref()?;
720    if !tls.ja3_hash.trim().is_empty() {
721        return None;
722    }
723
724    Some(mismatch_violation(
725        "tls_ja3_populated",
726        "signals.tls.ja3_hash must be populated when tls is present",
727        &["signals.tls.ja3_hash"],
728    ))
729}
730
731/// Validate normalized snapshot compatibility rules across modes and versions.
732///
733/// Current compatibility contract:
734/// - supports schema major version `1`
735/// - requires `signals.tls` for [`SnapshotMode::Http`]
736/// - requires `signals.webgl` for [`SnapshotMode::Browser`]
737/// - requires deprecated mirror fields, when present, to match canonical fields
738///
739/// # Errors
740///
741/// Returns [`SnapshotCompatibilityError`] when schema version is invalid or
742/// unsupported, required mode-specific signals are missing, or legacy mirror
743/// fields do not match canonical signal values.
744pub fn validate_snapshot_compatibility(
745    snapshot: &NormalizedFingerprintSnapshot,
746) -> Result<(), SnapshotCompatibilityError> {
747    // Validate input sizes first to prevent resource exhaustion
748    validate_snapshot_input_sizes(snapshot)?;
749
750    let major = parse_schema_major(&snapshot.schema_version)?;
751    if major != 1 {
752        return Err(SnapshotCompatibilityError::UnsupportedSchemaMajor(major));
753    }
754
755    match snapshot.mode {
756        SnapshotMode::Http => {
757            if snapshot.signals.tls.is_none() {
758                return Err(SnapshotCompatibilityError::MissingModeSignal {
759                    mode: snapshot.mode,
760                    signal: "tls",
761                });
762            }
763        }
764        SnapshotMode::Browser => {
765            if snapshot.signals.webgl.is_none() {
766                return Err(SnapshotCompatibilityError::MissingModeSignal {
767                    mode: snapshot.mode,
768                    signal: "webgl",
769                });
770            }
771        }
772        SnapshotMode::Hybrid => {}
773    }
774
775    if let Some(legacy_ua) = snapshot.legacy_user_agent.as_deref()
776        && legacy_ua != snapshot.signals.user_agent
777    {
778        return Err(SnapshotCompatibilityError::LegacyFieldMismatch {
779            field: "legacy_user_agent",
780        });
781    }
782
783    if let Some(legacy_ja3) = snapshot.legacy_ja3_hash.as_deref() {
784        let Some(tls) = snapshot.signals.tls.as_ref() else {
785            return Err(SnapshotCompatibilityError::LegacyFieldMismatch {
786                field: "legacy_ja3_hash",
787            });
788        };
789        if legacy_ja3 != tls.ja3_hash {
790            return Err(SnapshotCompatibilityError::LegacyFieldMismatch {
791                field: "legacy_ja3_hash",
792            });
793        }
794    }
795
796    Ok(())
797}
798
799#[cfg(test)]
800#[allow(clippy::expect_used)]
801mod tests {
802    use super::*;
803
804    #[allow(clippy::missing_const_for_fn)]
805    fn parse_snapshot(path: &str) -> NormalizedFingerprintSnapshot {
806        serde_json::from_str::<NormalizedFingerprintSnapshot>(path)
807            .expect("example snapshot should deserialize")
808    }
809
810    #[test]
811    fn example_http_snapshot_is_compatible() {
812        let snap = parse_snapshot(include_str!(
813            "../docs/examples/fingerprint-snapshot-v1-http.json"
814        ));
815        assert!(validate_snapshot_compatibility(&snap).is_ok());
816    }
817
818    #[test]
819    fn example_browser_snapshot_is_compatible() {
820        let snap = parse_snapshot(include_str!(
821            "../docs/examples/fingerprint-snapshot-v1-browser.json"
822        ));
823        assert!(validate_snapshot_compatibility(&snap).is_ok());
824    }
825
826    #[test]
827    fn coherence_report_is_clean_for_example_browser_snapshot() {
828        let snap = parse_snapshot(include_str!(
829            "../docs/examples/fingerprint-snapshot-v1-browser.json"
830        ));
831
832        let report = evaluate_snapshot_coherence(&snap);
833
834        assert!(!report.has_violations());
835    }
836
837    #[test]
838    fn coherence_report_flags_cross_field_mismatches() {
839        let mut snap = parse_snapshot(include_str!(
840            "../docs/examples/fingerprint-snapshot-v1-browser.json"
841        ));
842        snap.signals
843            .headers
844            .insert("user-agent".to_string(), "different-user-agent".to_string());
845        snap.signals
846            .headers
847            .insert("accept-language".to_string(), "fr-FR,fr;q=0.9".to_string());
848        snap.signals
849            .features
850            .insert("navigator.webdriver".to_string(), true);
851
852        let report = evaluate_snapshot_coherence(&snap);
853
854        assert!(report.has_violations());
855        assert_eq!(report.violations.len(), 3);
856        let ids = report
857            .violations
858            .iter()
859            .map(|violation| violation.rule_id.as_str())
860            .collect::<Vec<_>>();
861        assert_eq!(
862            ids,
863            vec![
864                "user_agent_header_match",
865                "accept_language_header_match",
866                "navigator_webdriver_disabled"
867            ]
868        );
869    }
870
871    #[test]
872    fn http_mode_requires_tls_signal() {
873        let mut snap = parse_snapshot(include_str!(
874            "../docs/examples/fingerprint-snapshot-v1-http.json"
875        ));
876        snap.signals.tls = None;
877        let err = validate_snapshot_compatibility(&snap).expect_err("must fail without tls");
878        assert_eq!(
879            err,
880            SnapshotCompatibilityError::MissingModeSignal {
881                mode: SnapshotMode::Http,
882                signal: "tls"
883            }
884        );
885    }
886
887    #[test]
888    fn browser_mode_requires_webgl_signal() {
889        let mut snap = parse_snapshot(include_str!(
890            "../docs/examples/fingerprint-snapshot-v1-browser.json"
891        ));
892        snap.signals.webgl = None;
893        let err = validate_snapshot_compatibility(&snap).expect_err("must fail without webgl");
894        assert_eq!(
895            err,
896            SnapshotCompatibilityError::MissingModeSignal {
897                mode: SnapshotMode::Browser,
898                signal: "webgl"
899            }
900        );
901    }
902
903    #[test]
904    fn unsupported_schema_major_fails_compatibility() {
905        let mut snap = parse_snapshot(include_str!(
906            "../docs/examples/fingerprint-snapshot-v1-http.json"
907        ));
908        snap.schema_version = "2.0.0".to_string();
909        let err = validate_snapshot_compatibility(&snap).expect_err("must fail unsupported major");
910        assert_eq!(err, SnapshotCompatibilityError::UnsupportedSchemaMajor(2));
911    }
912
913    #[test]
914    fn schema_version_requires_exact_semver_triplet() {
915        let mut snap = parse_snapshot(include_str!(
916            "../docs/examples/fingerprint-snapshot-v1-http.json"
917        ));
918        snap.schema_version = "1.0.0.1".to_string();
919
920        let err = validate_snapshot_compatibility(&snap).expect_err("must reject extra segments");
921        assert_eq!(
922            err,
923            SnapshotCompatibilityError::InvalidSchemaVersion("1.0.0.1".to_string())
924        );
925    }
926
927    #[test]
928    fn deterministic_collector_produces_identical_bytes_for_volatile_differences() {
929        let mut first = parse_snapshot(include_str!(
930            "../docs/examples/fingerprint-snapshot-v1-http.json"
931        ));
932        first.captured_at = "2026-04-26T23:11:11Z".to_string();
933        first
934            .metadata
935            .insert("trace_id".to_string(), "trace-a".to_string());
936        first
937            .metadata
938            .insert("request_id".to_string(), "request-a".to_string());
939
940        let mut second = parse_snapshot(include_str!(
941            "../docs/examples/fingerprint-snapshot-v1-http.json"
942        ));
943        second.captured_at = "2026-04-27T01:22:33Z".to_string();
944        second
945            .metadata
946            .insert("request_id".to_string(), "request-b".to_string());
947        second
948            .metadata
949            .insert("trace_id".to_string(), "trace-b".to_string());
950
951        let options = SnapshotDeterminismOptions::default();
952        let left = collect_deterministic_snapshot_bytes(&first, &options).expect("must serialize");
953        let right =
954            collect_deterministic_snapshot_bytes(&second, &options).expect("must serialize");
955        assert_eq!(left, right);
956    }
957
958    #[test]
959    fn deterministic_collector_keeps_nonvolatile_metadata() {
960        let mut snap = parse_snapshot(include_str!(
961            "../docs/examples/fingerprint-snapshot-v1-http.json"
962        ));
963        snap.metadata
964            .insert("collector".to_string(), "charon-v2".to_string());
965        snap.metadata
966            .insert("trace_id".to_string(), "volatile".to_string());
967
968        let options = SnapshotDeterminismOptions::default();
969        let bytes = collect_deterministic_snapshot_bytes(&snap, &options).expect("must collect");
970        let collected: NormalizedFingerprintSnapshot =
971            serde_json::from_slice(&bytes).expect("bytes should deserialize");
972
973        assert_eq!(collected.captured_at, DETERMINISTIC_CAPTURED_AT);
974        assert_eq!(
975            collected.metadata.get("collector"),
976            Some(&"charon-v2".to_string())
977        );
978        assert!(!collected.metadata.contains_key("trace_id"));
979    }
980
981    #[test]
982    fn deterministic_collector_rejects_incompatible_snapshot() {
983        let mut snap = parse_snapshot(include_str!(
984            "../docs/examples/fingerprint-snapshot-v1-browser.json"
985        ));
986        snap.signals.webgl = None;
987
988        let options = SnapshotDeterminismOptions::default();
989        let err = collect_deterministic_snapshot_bytes(&snap, &options)
990            .expect_err("incompatible snapshot must fail");
991
992        assert_eq!(
993            err,
994            SnapshotCollectionError::Compatibility(SnapshotCompatibilityError::MissingModeSignal {
995                mode: SnapshotMode::Browser,
996                signal: "webgl"
997            })
998        );
999    }
1000
1001    #[test]
1002    fn compare_snapshot_signal_drift_reports_focused_paths() {
1003        let baseline = parse_snapshot(include_str!(
1004            "../docs/examples/fingerprint-snapshot-v1-http.json"
1005        ));
1006        let mut candidate = baseline.clone();
1007        candidate.signals.user_agent = "Mozilla/5.0 (X11; Linux x86_64)".to_string();
1008        candidate.legacy_user_agent = Some(candidate.signals.user_agent.clone());
1009        candidate
1010            .signals
1011            .features
1012            .insert("new_flag".to_string(), true);
1013
1014        let report = compare_snapshot_signal_drift(
1015            &baseline,
1016            &candidate,
1017            &SnapshotDeterminismOptions::default(),
1018        )
1019        .expect("drift comparison must succeed");
1020
1021        assert!(report.has_drift());
1022        assert!(
1023            report
1024                .diffs
1025                .iter()
1026                .any(|d| d.path == "signals.user_agent"
1027                    && d.kind == SnapshotSignalDriftKind::Changed)
1028        );
1029        assert!(
1030            report
1031                .diffs
1032                .iter()
1033                .any(|d| d.path == "signals.features.new_flag"
1034                    && d.kind == SnapshotSignalDriftKind::Added)
1035        );
1036    }
1037
1038    #[test]
1039    fn compare_snapshot_signal_drift_ignores_volatile_only_changes() {
1040        let mut baseline = parse_snapshot(include_str!(
1041            "../docs/examples/fingerprint-snapshot-v1-http.json"
1042        ));
1043        baseline.captured_at = "2026-04-26T00:00:00Z".to_string();
1044        baseline
1045            .metadata
1046            .insert("trace_id".to_string(), "trace-a".to_string());
1047
1048        let mut candidate = baseline.clone();
1049        candidate.captured_at = "2026-04-27T00:00:00Z".to_string();
1050        candidate
1051            .metadata
1052            .insert("trace_id".to_string(), "trace-b".to_string());
1053
1054        let report = compare_snapshot_signal_drift(
1055            &baseline,
1056            &candidate,
1057            &SnapshotDeterminismOptions::default(),
1058        )
1059        .expect("drift comparison must succeed");
1060
1061        assert!(!report.has_drift());
1062    }
1063
1064    #[test]
1065    fn reject_excessively_long_user_agent() {
1066        let mut snapshot = parse_snapshot(include_str!(
1067            "../docs/examples/fingerprint-snapshot-v1-http.json"
1068        ));
1069        snapshot.signals.user_agent = "A".repeat(MAX_STRING_FIELD_BYTES + 1);
1070
1071        let result = validate_snapshot_compatibility(&snapshot);
1072        assert!(result.is_err(), "should reject excessively long user_agent");
1073        assert!(matches!(
1074            result,
1075            Err(SnapshotCompatibilityError::InputValidation(_))
1076        ));
1077    }
1078
1079    #[test]
1080    fn reject_excessively_long_platform() {
1081        let mut snapshot = parse_snapshot(include_str!(
1082            "../docs/examples/fingerprint-snapshot-v1-http.json"
1083        ));
1084        snapshot.signals.platform = "B".repeat(MAX_STRING_FIELD_BYTES + 1);
1085
1086        let result = validate_snapshot_compatibility(&snapshot);
1087        assert!(result.is_err(), "should reject excessively long platform");
1088    }
1089
1090    #[test]
1091    fn reject_excessive_headers_count() {
1092        let mut snapshot = parse_snapshot(include_str!(
1093            "../docs/examples/fingerprint-snapshot-v1-http.json"
1094        ));
1095        for i in 0..=MAX_HEADERS_ENTRIES {
1096            snapshot
1097                .signals
1098                .headers
1099                .insert(format!("X-Custom-{i}"), "value".to_string());
1100        }
1101
1102        let result = validate_snapshot_compatibility(&snapshot);
1103        assert!(result.is_err(), "should reject excessive headers count");
1104    }
1105
1106    #[test]
1107    fn reject_excessive_metadata_count() {
1108        let mut snapshot = parse_snapshot(include_str!(
1109            "../docs/examples/fingerprint-snapshot-v1-http.json"
1110        ));
1111        for i in 0..=MAX_METADATA_ENTRIES {
1112            snapshot
1113                .metadata
1114                .insert(format!("key_{i}"), "value".to_string());
1115        }
1116
1117        let result = validate_snapshot_compatibility(&snapshot);
1118        assert!(result.is_err(), "should reject excessive metadata count");
1119    }
1120
1121    #[test]
1122    fn reject_oversized_webgl_vendor() {
1123        let mut snapshot = parse_snapshot(include_str!(
1124            "../docs/examples/fingerprint-snapshot-v1-http.json"
1125        ));
1126        if let Some(webgl) = snapshot.signals.webgl.as_mut() {
1127            webgl.vendor = "X".repeat(MAX_STRING_FIELD_BYTES + 1);
1128        }
1129
1130        let result = validate_snapshot_compatibility(&snapshot);
1131        assert!(result.is_err(), "should reject oversized webgl vendor");
1132    }
1133
1134    #[test]
1135    fn reject_oversized_ja3_hash() {
1136        let mut snapshot = parse_snapshot(include_str!(
1137            "../docs/examples/fingerprint-snapshot-v1-http.json"
1138        ));
1139        if let Some(tls) = snapshot.signals.tls.as_mut() {
1140            tls.ja3_hash = "X".repeat(MAX_HASH_FIELD_BYTES + 1);
1141        }
1142
1143        let result = validate_snapshot_compatibility(&snapshot);
1144        assert!(result.is_err(), "should reject oversized ja3_hash");
1145    }
1146
1147    #[test]
1148    fn reject_oversized_schema_version() {
1149        let mut snapshot = parse_snapshot(include_str!(
1150            "../docs/examples/fingerprint-snapshot-v1-http.json"
1151        ));
1152        snapshot.schema_version = "1".repeat(MAX_STRING_FIELD_BYTES + 1);
1153
1154        let result = validate_snapshot_compatibility(&snapshot);
1155        assert!(result.is_err(), "should reject oversized schema_version");
1156    }
1157
1158    #[test]
1159    fn accept_valid_snapshot_with_max_sizes() {
1160        let mut snapshot = parse_snapshot(include_str!(
1161            "../docs/examples/fingerprint-snapshot-v1-http.json"
1162        ));
1163        // Fill fields to max allowed sizes
1164        snapshot.signals.user_agent = "A".repeat(MAX_STRING_FIELD_BYTES);
1165        snapshot.signals.platform = "B".repeat(MAX_STRING_FIELD_BYTES - 1);
1166
1167        let result = validate_snapshot_compatibility(&snapshot);
1168        assert!(result.is_ok(), "should accept snapshot at max limits");
1169    }
1170
1171    #[test]
1172    fn reject_excessive_features_count() {
1173        let mut snapshot = parse_snapshot(include_str!(
1174            "../docs/examples/fingerprint-snapshot-v1-http.json"
1175        ));
1176        for i in 0..=MAX_FEATURES_ENTRIES {
1177            snapshot
1178                .signals
1179                .features
1180                .insert(format!("feature_{i}"), false);
1181        }
1182
1183        let result = validate_snapshot_compatibility(&snapshot);
1184        assert!(result.is_err(), "should reject excessive features count");
1185    }
1186
1187    #[test]
1188    fn reject_large_header_value() {
1189        let mut snapshot = parse_snapshot(include_str!(
1190            "../docs/examples/fingerprint-snapshot-v1-http.json"
1191        ));
1192        snapshot.signals.headers.insert(
1193            "X-Large".to_string(),
1194            "V".repeat(MAX_STRING_FIELD_BYTES + 1),
1195        );
1196
1197        let result = validate_snapshot_compatibility(&snapshot);
1198        assert!(result.is_err(), "should reject large header value");
1199    }
1200
1201    #[test]
1202    fn deserialize_malformed_json_fails_gracefully() {
1203        let malformed = "{invalid json}";
1204        let result: Result<NormalizedFingerprintSnapshot, _> = serde_json::from_str(malformed);
1205        assert!(result.is_err(), "should fail on malformed JSON");
1206        // Error should not expose sensitive information
1207        let err_msg = result
1208            .expect_err("malformed JSON input must fail to deserialize")
1209            .to_string();
1210        assert!(!err_msg.contains("secret"), "error should not leak secrets");
1211    }
1212}