stygian_charon/pow_profile/
scorer.rs

1//! Deterministic `PoW` capability scorer (T93).
2//!
3//! The [`PowCapabilityScorer`] consumes a
4//! [`PowCapabilityProfile`][crate::pow_profile::PowCapabilityProfile]
5//! and produces a unit-interval score plus a coarse-grained
6//! [`PowCapabilityBand`] label. Scoring is fully deterministic:
7//! the same profile always produces the same score.
8//!
9//! ## Score formula
10//!
11//! ```text
12//! score = w_success * success_rate
13//!       + w_latency * latency_score
14//!       + w_retry   * retry_score
15//!       + w_failure * (1.0 - failure_severity)
16//! ```
17//!
18//! where:
19//! - `success_rate` is `solved_count / total_attempts`.
20//! - `latency_score` is `1.0 - clamp(p95 / latency_budget_ms, 0.0, 1.0)`.
21//! - `retry_score` is `1.0 - clamp(avg_retries / retry_budget, 0.0, 1.0)`.
22//! - `failure_severity` is the weighted average of
23//!   [`PowFailureMode::severity_weight`][crate::pow_profile::PowFailureMode::severity_weight]
24//!   over the failure histogram.
25//!
26//! The default weights sum to `1.0` so the output is
27//! guaranteed to be in `[0.0, 1.0]`. Callers can override the
28//! weights through [`PowCapabilityScorer::with_weights`].
29//!
30//! ## Sparse telemetry fallback
31//!
32//! When the profile's `total_attempts` is below
33//! [`MIN_OBSERVATIONS_FOR_SCORING`]
34//! the scorer returns [`SPARSE_FALLBACK_SCORE`] (a
35//! documented `0.5`). The fallback is the **same** value
36//! returned for the empty profile, so callers do not have to
37//! branch on "is this sparse" — they get a single number
38//! that is "no signal, default to neutral".
39//!
40//! ## Sampling window defaults
41//!
42//! The scorer does **not** adjust for `observation_window_secs`
43//! directly — the score is a function of the **content** of
44//! the profile, not its age. The store's TTL is the mechanism
45//! that keeps a stale profile from mis-routing the runner
46//! (an expired profile simply does not look up).
47
48use serde::{Deserialize, Serialize};
49
50use crate::pow_profile::profile::PowCapabilityProfile;
51
52/// Minimum number of attempts required for a
53/// [`PowCapabilityProfile`] to be scored instead of returning
54/// the sparse-telemetry fallback.
55///
56/// Three attempts is a conservative floor: one solved/failed
57/// pair is too noisy (a single transient block can flip the
58/// success rate from 100% to 50%), but the runner will rarely
59/// wait for a high-confidence sample if the target is
60/// actively challenging it. Callers that want a higher floor
61/// can use [`PowCapabilityScorer::with_min_observations`].
62pub const MIN_OBSERVATIONS_FOR_SCORING: u32 = 3;
63
64/// Documented score returned when the profile has fewer
65/// attempts than [`MIN_OBSERVATIONS_FOR_SCORING`].
66///
67/// The value is `0.5` — neutral on the unit interval, with no
68/// influence on the policy mapper. This is the "I have no
69/// signal" default; downstream policy mapping treats it as
70/// the no-op baseline so an unobserved target does not get
71/// over-escalated.
72pub const SPARSE_FALLBACK_SCORE: f64 = 0.5;
73
74/// Default latency budget for the latency-score term.
75///
76/// A solve that takes longer than the budget is treated as
77/// fully penalised (`latency_score = 0.0`). The value is
78/// conservative (5 seconds) — most well-behaved vendor `PoW`
79/// challenges solve well under that.
80pub const DEFAULT_LATENCY_BUDGET_MS: u64 = 5_000;
81
82/// Default retry budget for the retry-score term.
83///
84/// A profile whose average retries exceed the budget is
85/// treated as fully penalised (`retry_score = 0.0`).
86pub const DEFAULT_RETRY_BUDGET: f64 = 3.0;
87
88/// Configurable weights for the four scoring terms.
89///
90/// Defaults sum to `1.0` so the output is in `[0.0, 1.0]`
91/// when all weights are non-negative. Custom weights are
92/// not required to sum to `1.0` — the scorer re-normalises
93/// by dividing by the weight sum, so callers can experiment
94/// with relative emphasis without losing the unit-interval
95/// property.
96#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
97pub struct ProfileWeights {
98    /// Weight applied to the success-rate term.
99    pub success: f64,
100    /// Weight applied to the latency-score term.
101    pub latency: f64,
102    /// Weight applied to the retry-score term.
103    pub retry: f64,
104    /// Weight applied to the `(1 - failure_severity)` term.
105    pub failure: f64,
106}
107
108impl Default for ProfileWeights {
109    fn default() -> Self {
110        Self {
111            success: 0.40,
112            latency: 0.20,
113            retry: 0.10,
114            failure: 0.30,
115        }
116    }
117}
118
119/// Coarse-grained capability band derived from a unit-interval
120/// score.
121///
122/// The bands are the **policy** surface — the policy mapper
123/// in `crate::pow_profile::policy` consumes a band and
124/// returns deterministic escalation / pacing adjustments.
125/// Callers that want a continuous score use
126/// [`PowCapabilityScorer::score`] directly.
127#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
128#[serde(rename_all = "snake_case")]
129pub enum PowCapabilityBand {
130    /// The profile shows consistent, fast, low-retry solves.
131    Strong,
132    /// The profile shows acceptable but not impressive
133    /// results.
134    Degraded,
135    /// The profile shows slow solves, high retries, or many
136    /// failures.
137    Weak,
138    /// The profile has too few samples to score; the
139    /// documented default is returned instead.
140    Unknown,
141}
142
143impl PowCapabilityBand {
144    /// Stable lower-case wire label.
145    #[must_use]
146    pub const fn label(self) -> &'static str {
147        match self {
148            Self::Strong => "strong",
149            Self::Degraded => "degraded",
150            Self::Weak => "weak",
151            Self::Unknown => "unknown",
152        }
153    }
154}
155
156/// Configurable deterministic scorer for a
157/// [`PowCapabilityProfile`].
158///
159/// The scorer is `Copy` so it can live in a static
160/// configuration struct without a wrapper. The default
161/// configuration ([`PowCapabilityScorer::default`]) is the
162/// recommended starting point — every field is documented
163/// and has a public constant.
164#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
165pub struct PowCapabilityScorer {
166    weights: ProfileWeights,
167    min_observations: u32,
168    latency_budget_ms: u64,
169    retry_budget: f64,
170}
171
172impl Default for PowCapabilityScorer {
173    fn default() -> Self {
174        Self {
175            weights: ProfileWeights::default(),
176            min_observations: MIN_OBSERVATIONS_FOR_SCORING,
177            latency_budget_ms: DEFAULT_LATENCY_BUDGET_MS,
178            retry_budget: DEFAULT_RETRY_BUDGET,
179        }
180    }
181}
182
183impl PowCapabilityScorer {
184    /// Build a scorer with the default configuration.
185    #[must_use]
186    pub fn new() -> Self {
187        Self::default()
188    }
189
190    /// Replace the scoring weights.
191    #[must_use]
192    pub const fn with_weights(mut self, weights: ProfileWeights) -> Self {
193        self.weights = weights;
194        self
195    }
196
197    /// Replace the minimum-observation floor.
198    /// A value of `0` effectively disables sparse-telemetry
199    /// fallback (the scorer will return a score for any
200    /// non-empty profile). Negative values are clamped to
201    /// `0` so the public surface stays simple.
202    #[must_use]
203    pub const fn with_min_observations(mut self, min_observations: u32) -> Self {
204        self.min_observations = min_observations;
205        self
206    }
207
208    /// Replace the latency budget (milliseconds) used to
209    /// score the p95 latency term. A value of `0` falls
210    /// back to the documented default so the latency term
211    /// never silently becomes "always penalised".
212    #[must_use]
213    pub const fn with_latency_budget_ms(mut self, latency_budget_ms: u64) -> Self {
214        if latency_budget_ms == 0 {
215            self.latency_budget_ms = DEFAULT_LATENCY_BUDGET_MS;
216        } else {
217            self.latency_budget_ms = latency_budget_ms;
218        }
219        self
220    }
221
222    /// Replace the retry budget used to score the average
223    /// retries term. A non-positive value falls back to
224    /// the documented default.
225    #[must_use]
226    pub fn with_retry_budget(mut self, retry_budget: f64) -> Self {
227        if retry_budget <= 0.0 {
228            self.retry_budget = DEFAULT_RETRY_BUDGET;
229        } else {
230            self.retry_budget = retry_budget;
231        }
232        self
233    }
234
235    /// Current weights.
236    #[must_use]
237    pub const fn weights(&self) -> ProfileWeights {
238        self.weights
239    }
240
241    /// Current minimum-observation floor.
242    #[must_use]
243    pub const fn min_observations(&self) -> u32 {
244        self.min_observations
245    }
246
247    /// Current latency budget in milliseconds.
248    #[must_use]
249    pub const fn latency_budget_ms(&self) -> u64 {
250        self.latency_budget_ms
251    }
252
253    /// Current retry budget.
254    #[must_use]
255    pub const fn retry_budget(&self) -> f64 {
256        self.retry_budget
257    }
258
259    /// Score a [`PowCapabilityProfile`].
260    ///
261    /// Returns [`SPARSE_FALLBACK_SCORE`] when the profile has
262    /// fewer attempts than
263    /// [`PowCapabilityScorer::min_observations`]
264    /// (the documented "no signal" default). Otherwise the
265    /// four scoring terms are blended through the configured
266    /// weights and re-normalised so the result is in
267    /// `[0.0, 1.0]` even when the weights do not sum to
268    /// `1.0`.
269    #[must_use]
270    pub fn score(&self, profile: &PowCapabilityProfile) -> f64 {
271        if profile.total_attempts() < self.min_observations {
272            return SPARSE_FALLBACK_SCORE;
273        }
274
275        let success_rate = profile.success_rate();
276        let latency_score = self.latency_score(profile);
277        let retry_score = self.retry_score(profile);
278        let failure_score = 1.0 - profile.failure_severity();
279
280        let weight_sum =
281            self.weights.success + self.weights.latency + self.weights.retry + self.weights.failure;
282        if weight_sum <= 0.0 {
283            return SPARSE_FALLBACK_SCORE;
284        }
285
286        let raw = self.weights.failure.mul_add(
287            failure_score,
288            self.weights.retry.mul_add(
289                retry_score,
290                self.weights
291                    .latency
292                    .mul_add(latency_score, self.weights.success * success_rate),
293            ),
294        );
295        let normalised = raw / weight_sum;
296        clamp_unit(normalised)
297    }
298
299    /// Score a profile and return a coarse
300    /// [`PowCapabilityBand`].
301    ///
302    /// The band thresholds are fixed and documented
303    /// (`strong` ≥ `0.75`, `degraded` ≥ `0.40`, `weak`
304    /// otherwise). Profiles that do not meet the
305    /// minimum-observation floor return
306    /// [`PowCapabilityBand::Unknown`].
307    #[must_use]
308    pub fn band(&self, profile: &PowCapabilityProfile) -> PowCapabilityBand {
309        if profile.total_attempts() < self.min_observations {
310            return PowCapabilityBand::Unknown;
311        }
312        let value = self.score(profile);
313        band_for_score(value)
314    }
315
316    fn latency_score(&self, profile: &PowCapabilityProfile) -> f64 {
317        profile.solve_latency_ms_p95.map_or(1.0, |p95| {
318            // Latency values are well within f64 mantissa
319            // precision (5_000ms × 100 < 2^23); the `as`
320            // conversion is intentional and bounded by
321            // the configured latency budget.
322            #[allow(clippy::cast_precision_loss)]
323            let budget = self.latency_budget_ms as f64;
324            #[allow(clippy::cast_precision_loss)]
325            let ratio = ((p95 as f64) / budget).clamp(0.0, 1.0);
326            1.0 - ratio
327        })
328    }
329
330    fn retry_score(&self, profile: &PowCapabilityProfile) -> f64 {
331        let avg = profile.average_retries();
332        let ratio = (avg / self.retry_budget).clamp(0.0, 1.0);
333        1.0 - ratio
334    }
335}
336
337/// Map a unit-interval score to a [`PowCapabilityBand`].
338///
339/// Exposed publicly so the policy mapper can reuse the
340/// thresholds without depending on the scorer.
341#[must_use]
342pub fn band_for_score(score: f64) -> PowCapabilityBand {
343    if score >= 0.75 {
344        PowCapabilityBand::Strong
345    } else if score >= 0.40 {
346        PowCapabilityBand::Degraded
347    } else {
348        PowCapabilityBand::Weak
349    }
350}
351
352const fn clamp_unit(value: f64) -> f64 {
353    if value.is_nan() {
354        0.0
355    } else {
356        value.clamp(0.0, 1.0)
357    }
358}
359
360#[cfg(test)]
361#[allow(
362    clippy::unwrap_used,
363    clippy::expect_used,
364    clippy::panic,
365    clippy::indexing_slicing
366)]
367mod tests {
368    use super::*;
369    use crate::pow_profile::profile::{PowCapabilityProfile, PowCapabilitySample};
370    use crate::types::TargetClass;
371    use crate::vendor_classifier::VendorId;
372
373    fn approx_eq(a: f64, b: f64) -> bool {
374        (a - b).abs() < 1e-9
375    }
376
377    fn empty_profile() -> PowCapabilityProfile {
378        PowCapabilityProfile::new(
379            "example.com",
380            TargetClass::ContentSite,
381            VendorId::Cloudflare,
382        )
383    }
384
385    #[test]
386    fn empty_profile_returns_sparse_fallback() {
387        let scorer = PowCapabilityScorer::new();
388        let profile = empty_profile();
389        assert!(approx_eq(scorer.score(&profile), SPARSE_FALLBACK_SCORE));
390        assert_eq!(scorer.band(&profile), PowCapabilityBand::Unknown);
391    }
392
393    #[test]
394    fn sparse_profile_returns_sparse_fallback() {
395        // Two attempts is below the documented minimum
396        // (MIN_OBSERVATIONS_FOR_SCORING = 3).
397        let mut profile = empty_profile();
398        profile.merge(&PowCapabilitySample::solved(1_000, 0));
399        profile.merge(&PowCapabilitySample::solved(1_500, 0));
400        assert_eq!(profile.total_attempts(), 2);
401
402        let scorer = PowCapabilityScorer::new();
403        assert!(approx_eq(scorer.score(&profile), SPARSE_FALLBACK_SCORE));
404        assert_eq!(scorer.band(&profile), PowCapabilityBand::Unknown);
405    }
406
407    #[test]
408    fn good_telemetry_scores_strong() {
409        // 9 solved, 1 failed, fast p95, low retries, only
410        // one failure mode (TokenInvalid — moderate weight).
411        let mut profile = empty_profile();
412        for _ in 0..9 {
413            profile.merge(&PowCapabilitySample::solved(800, 0));
414        }
415        profile.merge(&PowCapabilitySample::failed(
416            1_000,
417            1,
418            crate::pow_profile::profile::PowFailureMode::TokenInvalid,
419        ));
420        assert_eq!(profile.total_attempts(), 10);
421
422        let scorer = PowCapabilityScorer::new();
423        let score = scorer.score(&profile);
424        assert!(
425            score > 0.75,
426            "good telemetry should score Strong, got {score}"
427        );
428        assert_eq!(scorer.band(&profile), PowCapabilityBand::Strong);
429    }
430
431    #[test]
432    fn poor_telemetry_scores_weak() {
433        // 2 solved, 8 failed, slow p95, many retries, mix
434        // of high-severity failure modes.
435        let mut profile = empty_profile();
436        profile.merge(&PowCapabilitySample::solved(4_000, 2));
437        profile.merge(&PowCapabilitySample::solved(4_500, 3));
438        for _ in 0..4 {
439            profile.merge(&PowCapabilitySample::failed(
440                5_000,
441                3,
442                crate::pow_profile::profile::PowFailureMode::Captcha,
443            ));
444        }
445        for _ in 0..4 {
446            profile.merge(&PowCapabilitySample::failed(
447                5_000,
448                3,
449                crate::pow_profile::profile::PowFailureMode::Blocked,
450            ));
451        }
452        assert_eq!(profile.total_attempts(), 10);
453
454        let scorer = PowCapabilityScorer::new();
455        let score = scorer.score(&profile);
456        assert!(
457            score < 0.40,
458            "poor telemetry should score Weak, got {score}"
459        );
460        assert_eq!(scorer.band(&profile), PowCapabilityBand::Weak);
461    }
462
463    #[test]
464    fn deterministic_for_same_input() {
465        let mut a = empty_profile();
466        let mut b = empty_profile();
467        for _ in 0..5 {
468            a.merge(&PowCapabilitySample::solved(1_000, 0));
469            b.merge(&PowCapabilitySample::solved(1_000, 0));
470        }
471        a.merge(&PowCapabilitySample::failed(
472            2_000,
473            1,
474            crate::pow_profile::profile::PowFailureMode::Timeout,
475        ));
476        b.merge(&PowCapabilitySample::failed(
477            2_000,
478            1,
479            crate::pow_profile::profile::PowFailureMode::Timeout,
480        ));
481
482        let scorer = PowCapabilityScorer::new();
483        assert!(approx_eq(scorer.score(&a), scorer.score(&b)));
484        assert_eq!(scorer.band(&a), scorer.band(&b));
485    }
486
487    #[test]
488    fn weight_sum_normalisation_handles_non_unit_weights() {
489        // Weights that don't sum to 1.0 should still
490        // produce a value in [0.0, 1.0].
491        let mut profile = empty_profile();
492        for _ in 0..3 {
493            profile.merge(&PowCapabilitySample::solved(1_000, 0));
494        }
495        let scorer = PowCapabilityScorer::new().with_weights(ProfileWeights {
496            success: 2.0,
497            latency: 1.0,
498            retry: 0.5,
499            failure: 1.0,
500        });
501        let score = scorer.score(&profile);
502        assert!((0.0..=1.0).contains(&score), "score out of range: {score}");
503    }
504
505    #[test]
506    fn zero_weight_sum_falls_back_to_sparse_default() {
507        let mut profile = empty_profile();
508        for _ in 0..3 {
509            profile.merge(&PowCapabilitySample::solved(1_000, 0));
510        }
511        let scorer = PowCapabilityScorer::new().with_weights(ProfileWeights {
512            success: 0.0,
513            latency: 0.0,
514            retry: 0.0,
515            failure: 0.0,
516        });
517        assert!(approx_eq(scorer.score(&profile), SPARSE_FALLBACK_SCORE));
518    }
519
520    #[test]
521    fn min_observations_override_is_respected() {
522        let mut profile = empty_profile();
523        profile.merge(&PowCapabilitySample::solved(1_000, 0));
524        // Default min is 3 — single attempt is sparse.
525        assert!(approx_eq(
526            PowCapabilityScorer::new().score(&profile),
527            SPARSE_FALLBACK_SCORE
528        ));
529        // Override to 1 attempt — same profile now scores.
530        let scorer = PowCapabilityScorer::new().with_min_observations(1);
531        let score = scorer.score(&profile);
532        assert!((0.0..=1.0).contains(&score));
533    }
534
535    #[test]
536    fn zero_latency_budget_falls_back_to_default() {
537        let scorer = PowCapabilityScorer::new().with_latency_budget_ms(0);
538        assert_eq!(scorer.latency_budget_ms(), DEFAULT_LATENCY_BUDGET_MS);
539    }
540
541    #[test]
542    fn zero_retry_budget_falls_back_to_default() {
543        let scorer = PowCapabilityScorer::new().with_retry_budget(0.0);
544        assert!((scorer.retry_budget() - DEFAULT_RETRY_BUDGET).abs() < 1e-9);
545    }
546
547    #[test]
548    fn band_thresholds_are_stable() {
549        assert_eq!(band_for_score(0.75), PowCapabilityBand::Strong);
550        assert_eq!(band_for_score(1.0), PowCapabilityBand::Strong);
551        assert_eq!(band_for_score(0.40), PowCapabilityBand::Degraded);
552        assert_eq!(band_for_score(0.74), PowCapabilityBand::Degraded);
553        assert_eq!(band_for_score(0.39), PowCapabilityBand::Weak);
554        assert_eq!(band_for_score(0.0), PowCapabilityBand::Weak);
555    }
556
557    #[test]
558    fn band_labels_are_stable() {
559        assert_eq!(PowCapabilityBand::Strong.label(), "strong");
560        assert_eq!(PowCapabilityBand::Degraded.label(), "degraded");
561        assert_eq!(PowCapabilityBand::Weak.label(), "weak");
562        assert_eq!(PowCapabilityBand::Unknown.label(), "unknown");
563    }
564
565    #[test]
566    fn nan_score_clamped_to_zero() {
567        let mut profile = empty_profile();
568        for _ in 0..3 {
569            profile.merge(&PowCapabilitySample::solved(1_000, 0));
570        }
571        // Weights summing to NaN via 0/0 — we force this
572        // by zeroing all weights (already covered above)
573        // and assert the path is safe.
574        let scorer = PowCapabilityScorer::new().with_weights(ProfileWeights {
575            success: 0.0,
576            latency: 0.0,
577            retry: 0.0,
578            failure: 0.0,
579        });
580        let score = scorer.score(&profile);
581        assert!(!score.is_nan());
582        assert!(approx_eq(score, SPARSE_FALLBACK_SCORE));
583    }
584}
stygian_charon/pow_profile/scorer.rs

stygian_charon/pow_profile/
scorer.rs