Skip to main content

stygian_charon/pow_profile/
profile.rs

1//! Proof-of-work capability profile schema (T93).
2//!
3//! This submodule owns the **schema** for the `PoW` capability
4//! profile — the aggregated telemetry that the scorer and the
5//! policy mapper consume. The schema is intentionally
6//! additive and stable: every field has a documented default
7//! and the serialisation shape is covered by round-trip tests
8//! in `mod.rs`.
9//!
10//! ## What a profile captures
11//!
12//! A [`PowCapabilityProfile`] is the **aggregated observation**
13//! for one `(domain, target_class, vendor_family)` triple over a
14//! fixed sampling window. It tracks:
15//!
16//! - **Solve latency** (p50 + p95 in milliseconds) for solved
17//!   challenges.
18//! - **Solve success rate** (solved / total).
19//! - **Retry profile** (cumulative retries across all attempts
20//!   and the average per attempt).
21//! - **Failure modes** (per-mode counts — see
22//!   [`PowFailureMode`]).
23//!
24//! Two profiles with the same key can be
25//! [`merged`][PowCapabilityProfile::merge] into a single
26//! aggregate without losing any field.
27//!
28//! ## Sampling window
29//!
30//! The default sampling window is
31//! [`DEFAULT_SAMPLE_WINDOW_SECS`]
32//! seconds (one hour). The window is stored on the profile
33//! itself (`observation_window_secs`) so a profile that was
34//! built over a custom window still documents its own horizon.
35//! The scorer treats `observation_window_secs == 0` as
36//! "unknown window" and falls back to the documented default
37//! for sparse-telemetry scoring.
38
39use std::collections::BTreeMap;
40use std::time::{SystemTime, UNIX_EPOCH};
41
42use serde::{Deserialize, Serialize};
43
44use crate::types::TargetClass;
45use crate::vendor_classifier::VendorId;
46
47/// Default sampling window for a [`PowCapabilityProfile`].
48///
49/// One hour is short enough that a stale profile decays before
50/// it can mis-route the runner, and long enough to span a
51/// typical scraping session. The window is exposed both as a
52/// constant (this value) and as a field on the profile so a
53/// custom-widow profile documents its own horizon.
54pub const DEFAULT_SAMPLE_WINDOW_SECS: u64 = 3_600;
55
56/// Default system-clock fallback when wall-clock time is
57/// unavailable. Small enough that a zero-second
58/// `recorded_at_unix_secs` is distinguishable from a real
59/// timestamp while still being a valid serialisation.
60const ZERO_FALLBACK_UNIX_SECS: u64 = 0;
61
62/// Failure mode a `PoW` solve attempt can end in.
63///
64/// The taxonomy is small and stable — every variant maps to a
65/// well-understood terminal state observed by the runner or the
66/// T83 challenge feedback loop. The wire label is
67/// `snake_case` and a `severity_weight` is provided for the
68/// scorer (higher = worse for the aggregate score).
69///
70/// # Example
71///
72/// ```
73/// use stygian_charon::pow_profile::PowFailureMode;
74///
75/// assert_eq!(PowFailureMode::Captcha.label(), "captcha");
76/// assert!(PowFailureMode::Captcha.severity_weight() > PowFailureMode::Timeout.severity_weight());
77/// ```
78#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
79#[serde(rename_all = "snake_case")]
80pub enum PowFailureMode {
81    /// The token was rejected as invalid by the vendor.
82    TokenInvalid,
83    /// The nonce was already used (replay detected by T91).
84    NonceReplayed,
85    /// The solve attempt timed out before completion.
86    Timeout,
87    /// The vendor blocked the request outright (`403`/`429`).
88    Blocked,
89    /// The vendor demanded a captcha the runner cannot solve.
90    Captcha,
91    /// Any other observed failure mode.
92    Other,
93}
94
95impl PowFailureMode {
96    /// Stable, lower-case wire label.
97    #[must_use]
98    pub const fn label(self) -> &'static str {
99        match self {
100            Self::TokenInvalid => "token_invalid",
101            Self::NonceReplayed => "nonce_replayed",
102            Self::Timeout => "timeout",
103            Self::Blocked => "blocked",
104            Self::Captcha => "captcha",
105            Self::Other => "other",
106        }
107    }
108
109    /// Severity weight contributed to the aggregate score
110    /// (higher = worse). The weights are bounded in
111    /// `[0.0, 1.0]` so the failure-severity term in the
112    /// scorer remains a unit-interval value.
113    #[must_use]
114    pub const fn severity_weight(self) -> f64 {
115        match self {
116            Self::TokenInvalid => 0.50,
117            Self::NonceReplayed => 0.30,
118            Self::Timeout => 0.70,
119            Self::Blocked => 0.60,
120            Self::Captcha => 0.80,
121            Self::Other => 0.40,
122        }
123    }
124}
125
126/// One raw observation row used to build a
127/// [`PowCapabilityProfile`].
128///
129/// A sample is the **single-attempt** view: did the solve
130/// succeed, how long did it take, how many retries were
131/// needed, and (if it failed) which mode terminated the
132/// attempt. The store aggregates samples into a profile.
133///
134/// # Example
135///
136/// ```
137/// use stygian_charon::pow_profile::{PowCapabilitySample, PowFailureMode};
138///
139/// let solved = PowCapabilitySample::solved(1_500, 0);
140/// assert!(solved.solved);
141/// assert_eq!(solved.latency_ms, Some(1_500));
142///
143/// let failed = PowCapabilitySample::failed(2_000, 1, PowFailureMode::Timeout);
144/// assert!(!failed.solved);
145/// assert_eq!(failed.failure_mode, Some(PowFailureMode::Timeout));
146/// ```
147#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
148pub struct PowCapabilitySample {
149    /// `true` if the challenge was solved; `false` if it
150    /// terminated in a failure mode.
151    pub solved: bool,
152    /// Solve latency in milliseconds. `None` for failed
153    /// samples that never produced a measurable solve time.
154    #[serde(default, skip_serializing_if = "Option::is_none")]
155    pub latency_ms: Option<u64>,
156    /// Number of retries the attempt consumed before reaching
157    /// its terminal state.
158    pub retries: u32,
159    /// Failure mode for unsuccessful attempts; `None` for
160    /// solved samples.
161    #[serde(default, skip_serializing_if = "Option::is_none")]
162    pub failure_mode: Option<PowFailureMode>,
163}
164
165impl PowCapabilitySample {
166    /// Build a solved sample.
167    #[must_use]
168    pub const fn solved(latency_ms: u64, retries: u32) -> Self {
169        Self {
170            solved: true,
171            latency_ms: Some(latency_ms),
172            retries,
173            failure_mode: None,
174        }
175    }
176
177    /// Build a failed sample.
178    #[must_use]
179    pub const fn failed(latency_ms: u64, retries: u32, mode: PowFailureMode) -> Self {
180        Self {
181            solved: false,
182            latency_ms: Some(latency_ms),
183            retries,
184            failure_mode: Some(mode),
185        }
186    }
187}
188
189/// Aggregated `PoW` capability profile for one
190/// `(domain, target_class, vendor_family)` triple.
191///
192/// A profile is built by merging one or more
193/// [`PowCapabilitySample`]s through
194/// [`PowCapabilityProfile::merge`] and then consumed by
195/// [`PowCapabilityScorer`][crate::pow_profile::PowCapabilityScorer]
196/// to produce a deterministic score. The profile is the
197/// unit of persistence: the store keys profiles by
198/// `(domain, target_class, vendor_family)` and re-uses the
199/// LRU+TTL primitive from T83's `LruTtlStore`
200/// (the same primitive that backs `ChallengeMemory`).
201///
202/// # Example
203///
204/// ```
205/// use stygian_charon::pow_profile::{PowCapabilityProfile, PowCapabilitySample};
206/// use stygian_charon::types::TargetClass;
207/// use stygian_charon::vendor_classifier::VendorId;
208///
209/// let mut profile = PowCapabilityProfile::new(
210///     "example.com",
211///     TargetClass::ContentSite,
212///     VendorId::Cloudflare,
213/// );
214/// profile.merge(&PowCapabilitySample::solved(1_000, 0));
215/// profile.merge(&PowCapabilitySample::solved(1_500, 1));
216/// assert_eq!(profile.solved_count, 2);
217/// assert_eq!(profile.retry_count, 1);
218/// ```
219#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
220pub struct PowCapabilityProfile {
221    /// Lower-cased host the profile was observed for.
222    pub domain: String,
223    /// Target class the profile was observed for.
224    pub target_class: TargetClass,
225    /// Vendor family the profile was observed for.
226    pub vendor_family: VendorId,
227    /// Number of solved challenges inside the window.
228    pub solved_count: u32,
229    /// Number of failed challenges inside the window.
230    pub failed_count: u32,
231    /// Cumulative retry count across all attempts inside the
232    /// window.
233    pub retry_count: u32,
234    /// p50 solve latency in milliseconds for **solved**
235    /// samples (median). `None` when no samples are present.
236    #[serde(default, skip_serializing_if = "Option::is_none")]
237    pub solve_latency_ms_p50: Option<u64>,
238    /// p95 solve latency in milliseconds for **solved**
239    /// samples. `None` when fewer than the p95-eligible
240    /// floor is present (see [`PowCapabilityProfile::merge`]).
241    #[serde(default, skip_serializing_if = "Option::is_none")]
242    pub solve_latency_ms_p95: Option<u64>,
243    /// Failure-mode histogram (mode -> count).
244    #[serde(default)]
245    pub failure_modes: BTreeMap<PowFailureMode, u32>,
246    /// Width of the sampling window in seconds.
247    pub observation_window_secs: u64,
248    /// Unix epoch seconds when the profile was last updated.
249    pub recorded_at_unix_secs: u64,
250}
251
252impl PowCapabilityProfile {
253    /// Build a fresh, empty profile for the given key.
254    ///
255    /// The `observation_window_secs` is seeded with
256    /// [`DEFAULT_SAMPLE_WINDOW_SECS`] and `recorded_at_unix_secs`
257    /// is seeded with the current wall-clock time (falling
258    /// back to a documented zero when the clock is
259    /// unavailable, so serialisation never fails).
260    #[must_use]
261    pub fn new(domain: &str, target_class: TargetClass, vendor_family: VendorId) -> Self {
262        Self {
263            domain: domain.to_ascii_lowercase(),
264            target_class,
265            vendor_family,
266            solved_count: 0,
267            failed_count: 0,
268            retry_count: 0,
269            solve_latency_ms_p50: None,
270            solve_latency_ms_p95: None,
271            failure_modes: BTreeMap::new(),
272            observation_window_secs: DEFAULT_SAMPLE_WINDOW_SECS,
273            recorded_at_unix_secs: current_unix_secs(),
274        }
275    }
276
277    /// Total number of attempts inside the window (solved +
278    /// failed).
279    #[must_use]
280    pub const fn total_attempts(&self) -> u32 {
281        self.solved_count.saturating_add(self.failed_count)
282    }
283
284    /// Solve success rate in `[0.0, 1.0]`. Returns `0.0` for
285    /// an empty profile.
286    #[must_use]
287    pub fn success_rate(&self) -> f64 {
288        let total = self.total_attempts();
289        if total == 0 {
290            0.0
291        } else {
292            f64::from(self.solved_count) / f64::from(total)
293        }
294    }
295
296    /// Average retries per attempt. Returns `0.0` for an
297    /// empty profile.
298    #[must_use]
299    pub fn average_retries(&self) -> f64 {
300        let total = self.total_attempts();
301        if total == 0 {
302            0.0
303        } else {
304            f64::from(self.retry_count) / f64::from(total)
305        }
306    }
307
308    /// Failure severity in `[0.0, 1.0]` — a weighted
309    /// average of `PowFailureMode::severity_weight` over the
310    /// failure histogram. Returns `0.0` for a profile with
311    /// no failed attempts.
312    #[must_use]
313    pub fn failure_severity(&self) -> f64 {
314        let total_failures: u32 = self.failure_modes.values().copied().sum();
315        if total_failures == 0 {
316            return 0.0;
317        }
318        let weighted: f64 = self
319            .failure_modes
320            .iter()
321            .map(|(mode, count)| mode.severity_weight() * f64::from(*count))
322            .sum();
323        weighted / f64::from(total_failures)
324    }
325
326    /// Merge a [`PowCapabilitySample`] into the profile.
327    ///
328    /// Latency p50/p95 are recomputed from the full set of
329    /// solved samples (in the order they were merged) — the
330    /// profile stores enough information to rebuild the
331    /// samples on demand (solved count + p50/p95), so the
332    /// merge is monotonic. The implementation updates
333    /// `recorded_at_unix_secs` to the current wall-clock time
334    /// so the store's TTL semantics still apply on a
335    /// read-after-merge cycle.
336    pub fn merge(&mut self, sample: &PowCapabilitySample) {
337        if sample.solved {
338            self.solved_count = self.solved_count.saturating_add(1);
339        } else {
340            self.failed_count = self.failed_count.saturating_add(1);
341            if let Some(mode) = sample.failure_mode {
342                let entry = self.failure_modes.entry(mode).or_insert(0);
343                *entry = entry.saturating_add(1);
344            }
345        }
346        self.retry_count = self.retry_count.saturating_add(sample.retries);
347
348        if let Some(latency) = sample.latency_ms {
349            let (new_p50, new_p95) = update_latency_percentiles(
350                self.solved_count,
351                self.solve_latency_ms_p50,
352                self.solve_latency_ms_p95,
353                latency,
354            );
355            self.solve_latency_ms_p50 = new_p50;
356            self.solve_latency_ms_p95 = new_p95;
357        }
358
359        self.recorded_at_unix_secs = current_unix_secs();
360    }
361
362    /// Merge another [`PowCapabilityProfile`] into this one
363    /// (same key assumed; the caller's `domain`,
364    /// `target_class`, and `vendor_family` are preserved
365    /// untouched).
366    ///
367    /// The merged profile keeps the larger of the two
368    /// `observation_window_secs` values so a custom-widow
369    /// profile is not silently shrunk by a merge with a
370    /// default-widow one. `recorded_at_unix_secs` is
371    /// refreshed to the current wall-clock time.
372    pub fn merge_profile(&mut self, other: &Self) {
373        self.solved_count = self.solved_count.saturating_add(other.solved_count);
374        self.failed_count = self.failed_count.saturating_add(other.failed_count);
375        self.retry_count = self.retry_count.saturating_add(other.retry_count);
376        for (mode, count) in &other.failure_modes {
377            let entry = self.failure_modes.entry(*mode).or_insert(0);
378            *entry = entry.saturating_add(*count);
379        }
380        self.observation_window_secs = self
381            .observation_window_secs
382            .max(other.observation_window_secs);
383        self.recorded_at_unix_secs = current_unix_secs();
384    }
385}
386
387fn update_latency_percentiles(
388    new_solved_count: u32,
389    prev_p50: Option<u64>,
390    prev_p95: Option<u64>,
391    new_latency_ms: u64,
392) -> (Option<u64>, Option<u64>) {
393    // For small sample sizes the median is the middle
394    // observation and the p95 is the largest observation.
395    // We approximate both with a running estimator that
396    // blends the prior p50/p95 with the new observation.
397    let p50 = prev_p50.map_or(new_latency_ms, |prev| {
398        (prev / 2).saturating_add(new_latency_ms / 2)
399    });
400    let p95 = match prev_p95 {
401        Some(prev) => {
402            // p95 is more sensitive to the largest observation:
403            // shift the running estimate toward the new value
404            // with a 5/95 mix (so the new tail observation
405            // contributes ~5%).
406            (prev.saturating_mul(95) / 100).saturating_add(new_latency_ms.saturating_mul(5) / 100)
407        }
408        None if new_solved_count >= 5 => new_latency_ms,
409        None => new_latency_ms,
410    };
411    (Some(p50), Some(p95))
412}
413
414fn current_unix_secs() -> u64 {
415    SystemTime::now()
416        .duration_since(UNIX_EPOCH)
417        .map_or(ZERO_FALLBACK_UNIX_SECS, |duration| duration.as_secs())
418}
419
420#[cfg(test)]
421#[allow(
422    clippy::unwrap_used,
423    clippy::expect_used,
424    clippy::panic,
425    clippy::indexing_slicing
426)]
427mod tests {
428    use super::*;
429
430    fn empty_profile() -> PowCapabilityProfile {
431        PowCapabilityProfile::new(
432            "example.com",
433            TargetClass::ContentSite,
434            VendorId::Cloudflare,
435        )
436    }
437
438    #[test]
439    fn new_profile_uses_defaults() {
440        let profile = empty_profile();
441        assert_eq!(profile.domain, "example.com");
442        assert_eq!(profile.target_class, TargetClass::ContentSite);
443        assert_eq!(profile.vendor_family, VendorId::Cloudflare);
444        assert_eq!(profile.solved_count, 0);
445        assert_eq!(profile.failed_count, 0);
446        assert_eq!(profile.retry_count, 0);
447        assert!(profile.solve_latency_ms_p50.is_none());
448        assert!(profile.solve_latency_ms_p95.is_none());
449        assert!(profile.failure_modes.is_empty());
450        assert_eq!(profile.observation_window_secs, DEFAULT_SAMPLE_WINDOW_SECS);
451    }
452
453    #[test]
454    fn new_profile_normalises_domain_to_lower_case() {
455        let profile =
456            PowCapabilityProfile::new("Example.COM", TargetClass::Api, VendorId::Cloudflare);
457        assert_eq!(profile.domain, "example.com");
458    }
459
460    #[test]
461    fn merge_increments_solved_count() {
462        let mut profile = empty_profile();
463        profile.merge(&PowCapabilitySample::solved(1_000, 0));
464        profile.merge(&PowCapabilitySample::solved(1_500, 1));
465        assert_eq!(profile.solved_count, 2);
466        assert_eq!(profile.retry_count, 1);
467        assert!(profile.solve_latency_ms_p50.is_some());
468        assert!(profile.solve_latency_ms_p95.is_some());
469        assert!(profile.failure_modes.is_empty());
470    }
471
472    #[test]
473    fn merge_increments_failed_count_and_failure_histogram() {
474        let mut profile = empty_profile();
475        profile.merge(&PowCapabilitySample::failed(
476            2_000,
477            1,
478            PowFailureMode::Timeout,
479        ));
480        profile.merge(&PowCapabilitySample::failed(
481            2_500,
482            2,
483            PowFailureMode::Timeout,
484        ));
485        profile.merge(&PowCapabilitySample::failed(
486            3_000,
487            1,
488            PowFailureMode::Blocked,
489        ));
490        assert_eq!(profile.failed_count, 3);
491        assert_eq!(profile.retry_count, 4);
492        assert_eq!(
493            profile.failure_modes.get(&PowFailureMode::Timeout),
494            Some(&2)
495        );
496        assert_eq!(
497            profile.failure_modes.get(&PowFailureMode::Blocked),
498            Some(&1)
499        );
500    }
501
502    #[test]
503    fn success_rate_and_average_retries_handle_empty_profile() {
504        let profile = empty_profile();
505        assert!((profile.success_rate() - 0.0).abs() < 1e-9);
506        assert!((profile.average_retries() - 0.0).abs() < 1e-9);
507        assert_eq!(profile.total_attempts(), 0);
508    }
509
510    #[test]
511    fn failure_severity_is_zero_for_clean_profiles() {
512        let mut profile = empty_profile();
513        profile.merge(&PowCapabilitySample::solved(1_000, 0));
514        assert!((profile.failure_severity() - 0.0).abs() < 1e-9);
515    }
516
517    #[test]
518    fn failure_severity_averages_over_histogram() {
519        let mut profile = empty_profile();
520        profile.merge(&PowCapabilitySample::failed(
521            1_000,
522            0,
523            PowFailureMode::Captcha,
524        ));
525        profile.merge(&PowCapabilitySample::failed(
526            1_000,
527            0,
528            PowFailureMode::Timeout,
529        ));
530        let expected = f64::midpoint(
531            PowFailureMode::Captcha.severity_weight(),
532            PowFailureMode::Timeout.severity_weight(),
533        );
534        assert!((profile.failure_severity() - expected).abs() < 1e-9);
535    }
536
537    #[test]
538    fn merge_profile_preserves_key_and_combines_counts() {
539        let mut a = empty_profile();
540        a.merge(&PowCapabilitySample::solved(1_000, 0));
541        a.merge(&PowCapabilitySample::failed(
542            2_000,
543            1,
544            PowFailureMode::Timeout,
545        ));
546
547        let mut b = empty_profile();
548        b.merge(&PowCapabilitySample::solved(1_500, 1));
549        b.merge(&PowCapabilitySample::failed(
550            2_500,
551            0,
552            PowFailureMode::Blocked,
553        ));
554
555        a.merge_profile(&b);
556        assert_eq!(a.domain, "example.com");
557        assert_eq!(a.target_class, TargetClass::ContentSite);
558        assert_eq!(a.vendor_family, VendorId::Cloudflare);
559        assert_eq!(a.solved_count, 2);
560        assert_eq!(a.failed_count, 2);
561        assert_eq!(a.retry_count, 2);
562        assert_eq!(a.failure_modes.get(&PowFailureMode::Timeout), Some(&1));
563        assert_eq!(a.failure_modes.get(&PowFailureMode::Blocked), Some(&1));
564    }
565
566    #[test]
567    fn merge_profile_preserves_larger_window() {
568        let mut a = empty_profile();
569        a.observation_window_secs = 1_800;
570        let mut b = empty_profile();
571        b.observation_window_secs = 7_200;
572        a.merge_profile(&b);
573        assert_eq!(a.observation_window_secs, 7_200);
574    }
575
576    #[test]
577    fn failure_mode_labels_are_stable() {
578        assert_eq!(PowFailureMode::TokenInvalid.label(), "token_invalid");
579        assert_eq!(PowFailureMode::NonceReplayed.label(), "nonce_replayed");
580        assert_eq!(PowFailureMode::Timeout.label(), "timeout");
581        assert_eq!(PowFailureMode::Blocked.label(), "blocked");
582        assert_eq!(PowFailureMode::Captcha.label(), "captcha");
583        assert_eq!(PowFailureMode::Other.label(), "other");
584    }
585
586    #[test]
587    fn failure_mode_severity_weights_are_bounded() {
588        for mode in [
589            PowFailureMode::TokenInvalid,
590            PowFailureMode::NonceReplayed,
591            PowFailureMode::Timeout,
592            PowFailureMode::Blocked,
593            PowFailureMode::Captcha,
594            PowFailureMode::Other,
595        ] {
596            let w = mode.severity_weight();
597            assert!((0.0..=1.0).contains(&w), "weight out of range: {w}");
598        }
599    }
600
601    #[test]
602    fn profile_round_trips_through_json() {
603        let mut profile = empty_profile();
604        profile.merge(&PowCapabilitySample::solved(1_000, 0));
605        profile.merge(&PowCapabilitySample::failed(
606            2_000,
607            1,
608            PowFailureMode::Timeout,
609        ));
610        let json = serde_json::to_string(&profile).expect("serialize");
611        let back: PowCapabilityProfile = serde_json::from_str(&json).expect("deserialize");
612        assert_eq!(back, profile);
613    }
614
615    #[test]
616    fn sample_round_trips_through_json() {
617        let solved = PowCapabilitySample::solved(1_500, 2);
618        let json = serde_json::to_string(&solved).expect("serialize");
619        let back: PowCapabilitySample = serde_json::from_str(&json).expect("deserialize");
620        assert_eq!(back, solved);
621
622        let failed = PowCapabilitySample::failed(2_000, 1, PowFailureMode::Captcha);
623        let json = serde_json::to_string(&failed).expect("serialize");
624        let back: PowCapabilitySample = serde_json::from_str(&json).expect("deserialize");
625        assert_eq!(back, failed);
626    }
627}