Skip to main content

stygian_charon/vendor_resolver/
rules.rs

1//! Resolution rule schema (T90).
2//!
3//! A [`ResolutionRule`] is the **codified policy mapping** from a
4//! set of detected anti-bot vendors to a target playbook. Each
5//! rule carries:
6//!
7//! - the **playbook id** it resolves to (`tier2-hostile`,
8//!   `tier1-js`, `tier1-static`, or the sentinel empty string for
9//!   the `Manual` strategy marker),
10//! - the **target class** the playbook maps to,
11//! - a **priority** (lower wins) so multi-vendor and conflicting
12//!   rule scenarios resolve deterministically,
13//! - the **vendor list** that triggers the rule, with per-vendor
14//!   weights so the [`MergeStrategy`] can decide what to do when
15//!   more than one listed vendor matched,
16//! - the **confidence/score gates** (`min_confidence`,
17//!   `min_score`) the [`crate::vendor_classifier::VendorClassification`]
18//!   must cross for the rule to fire, and
19//! - the **merge strategy** the resolver applies when the rule
20//!   fires alongside one or more other rules (see the table in
21//!   [`crate::vendor_resolver`]).
22//!
23//! ## Multi-vendor merge strategies
24//!
25//! | `MergeStrategy`     | Behaviour                                                                                       |
26//! |---------------------|--------------------------------------------------------------------------------------------------|
27//! | `StrongestVendor`   | Pick the highest-weight vendor in the rule and resolve with its playbook.                       |
28//! | `Single`            | Pick the single matched vendor (lowest `VendorId` discriminant on ties) and resolve.            |
29//! | `Manual`            | Defer to manual mode — return the `StrategyMarker::Manual` marker.                              |
30//!
31//! The strategies are documented in the module rustdoc and shipped
32//! as data in `crates/stygian-charon/data/vendor_playbook_rules/`.
33
34use std::collections::BTreeMap;
35
36use serde::{Deserialize, Serialize};
37
38use crate::types::TargetClass;
39use crate::vendor_classifier::VendorId;
40use crate::vendor_resolver::error::VendorResolverError;
41
42/// How the resolver should combine multiple matched vendors into
43/// a single playbook decision.
44///
45/// See the module-level table for the documented behaviour of each
46/// variant.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum MergeStrategy {
50    /// Pick the highest-weight vendor in the rule and resolve with
51    /// its playbook.
52    StrongestVendor,
53    /// Pick the single matched vendor (lowest `VendorId`
54    /// discriminant on ties) and resolve with its playbook.
55    Single,
56    /// Defer to manual mode — return the
57    /// [`StrategyMarker::Manual`][crate::vendor_resolver::StrategyMarker::Manual]
58    /// marker so the caller preserves its existing manual mode
59    /// selection.
60    Manual,
61}
62
63impl MergeStrategy {
64    /// Stable lower-case wire label.
65    #[must_use]
66    pub const fn label(self) -> &'static str {
67        match self {
68            Self::StrongestVendor => "strongest_vendor",
69            Self::Single => "single",
70            Self::Manual => "manual",
71        }
72    }
73}
74
75/// One vendor entry inside a [`ResolutionRule::vendors`] list.
76///
77/// Each entry pairs a [`VendorId`] with a **rule-weight** (not to
78/// be confused with the classifier's signal weight). The rule-weight
79/// tells the [`MergeStrategy::StrongestVendor`] logic which vendor
80/// dominates when several listed vendors match simultaneously.
81#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
82pub struct VendorRuleMatch {
83    /// Vendor that triggers the rule. The TOML wire format uses
84    /// the [`label`][VendorId::label] (e.g. `"datadome"`,
85    /// `"perimeter_x"`) so the format matches the existing vendor
86    /// classifier TOML definitions rather than the serde
87    /// `snake_case` rename of the enum.
88    #[serde(deserialize_with = "deserialize_vendor_id_from_label")]
89    pub vendor: VendorId,
90    /// Per-rule weight used by
91    /// [`MergeStrategy::StrongestVendor`] when multiple listed
92    /// vendors match. Higher wins.
93    pub weight: u32,
94}
95
96fn deserialize_vendor_id_from_label<'de, D>(deserializer: D) -> Result<VendorId, D::Error>
97where
98    D: serde::Deserializer<'de>,
99{
100    use serde::Deserialize;
101    let label = String::deserialize(deserializer)?;
102    VendorId::from_label(&label)
103        .ok_or_else(|| serde::de::Error::custom(format!("unknown vendor label '{label}'")))
104}
105
106/// Single codified rule mapping vendor patterns to a playbook.
107///
108/// Rules are **ordered by priority** (lower numbers win). When two
109/// rules both match a [`crate::vendor_classifier::VendorClassification`]
110/// the resolver picks the lowest-priority rule, then applies its
111/// [`merge_strategy`][Self::merge_strategy] to combine any
112/// remaining rules into a single decision.
113///
114/// # Example
115///
116/// ```
117/// use stygian_charon::vendor_resolver::{MergeStrategy, ResolutionRule, VendorRuleMatch};
118/// use stygian_charon::types::TargetClass;
119/// use stygian_charon::vendor_classifier::VendorId;
120///
121/// let rule = ResolutionRule {
122///     id: "tier2-hostile".to_string(),
123///     playbook_id: "tier2-hostile".to_string(),
124///     target_class: TargetClass::HighSecurity,
125///     priority: 0,
126///     merge_strategy: MergeStrategy::StrongestVendor,
127///     description: "Hostile anti-bot vendors".to_string(),
128///     min_confidence: 0.60,
129///     min_score: 5,
130///     require_unknown_vendor: false,
131///     vendors: vec![VendorRuleMatch {
132///         vendor: VendorId::DataDome,
133///         weight: 10,
134///     }],
135/// };
136/// assert!(rule.validate().is_ok());
137/// ```
138#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
139pub struct ResolutionRule {
140    /// Stable rule id (`"tier2-hostile"`, `"tier1-js-cloudflare"`,
141    /// etc.). Required, non-empty, unique within a resolver bundle.
142    pub id: String,
143    /// Playbook id the rule resolves to. Empty string means the
144    /// `Manual` strategy marker should be returned instead.
145    pub playbook_id: String,
146    /// Target class the resolved playbook maps to.
147    pub target_class: TargetClass,
148    /// Priority (lower wins). The baseline rules use `0`
149    /// (tier2-hostile), `10` (tier1-js-cloudflare), `100`
150    /// (tier1-static), and `1000` (default-manual).
151    pub priority: u32,
152    /// Merge strategy used when this rule fires alongside other
153    /// matching rules.
154    pub merge_strategy: MergeStrategy,
155    /// Human-readable description for operator logs.
156    #[serde(default)]
157    pub description: String,
158    /// Minimum classifier confidence the top vendor must cross for
159    /// the rule to fire. Must be in `[0.0, 1.0]`.
160    #[serde(default = "default_min_confidence")]
161    pub min_confidence: f64,
162    /// Minimum classifier score the top vendor must reach for the
163    /// rule to fire. Must be `> 0` (or `0` for the catch-all
164    /// `default-manual` rule).
165    #[serde(default)]
166    pub min_score: u32,
167    /// When `true`, the rule only fires when the classifier reports
168    /// [`VendorId::Unknown`]. Used by the `tier1-static` rule so
169    /// benign unknown classifications do not accidentally swallow
170    /// single-signal low-confidence matches.
171    #[serde(default)]
172    pub require_unknown_vendor: bool,
173    /// Vendors that trigger the rule.
174    #[serde(default)]
175    pub vendors: Vec<VendorRuleMatch>,
176}
177
178const fn default_min_confidence() -> f64 {
179    0.0
180}
181
182impl ResolutionRule {
183    /// Validate the rule's internal consistency. Reports the first
184    /// failing field with a structured error that includes both the
185    /// rule id and the field path.
186    ///
187    /// # Errors
188    ///
189    /// Returns [`VendorResolverError`] on the first inconsistency.
190    /// The error embeds the **rule id**, the **field path**, and
191    /// the **bad value** so operators can locate the offending
192    /// TOML line without re-running the loader.
193    pub fn validate(&self) -> Result<(), VendorResolverError> {
194        if self.id.trim().is_empty() {
195            return Err(VendorResolverError::invalid_rule(
196                self.id.clone(),
197                "id",
198                self.id.clone(),
199                "rule id must be a non-empty string",
200            ));
201        }
202        if !(0.0..=1.0).contains(&self.min_confidence) {
203            return Err(VendorResolverError::invalid_rule(
204                self.id.clone(),
205                "min_confidence",
206                self.min_confidence,
207                "min_confidence must be in [0.0, 1.0]",
208            ));
209        }
210        if self.min_confidence > 0.0 && self.vendors.is_empty() {
211            return Err(VendorResolverError::invalid_rule(
212                self.id.clone(),
213                "vendors",
214                "<empty>",
215                "vendors list must be non-empty when min_confidence > 0.0",
216            ));
217        }
218        if self.merge_strategy == MergeStrategy::Manual && !self.playbook_id.is_empty() {
219            return Err(VendorResolverError::invalid_rule(
220                self.id.clone(),
221                "playbook_id",
222                self.playbook_id.clone(),
223                "playbook_id must be empty when merge_strategy = manual",
224            ));
225        }
226        if self.merge_strategy != MergeStrategy::Manual && self.playbook_id.is_empty() {
227            return Err(VendorResolverError::invalid_rule(
228                self.id.clone(),
229                "playbook_id",
230                self.playbook_id.clone(),
231                "playbook_id must be a non-empty string when merge_strategy is not manual",
232            ));
233        }
234        for (i, v) in self.vendors.iter().enumerate() {
235            if v.weight == 0 {
236                return Err(VendorResolverError::invalid_rule(
237                    self.id.clone(),
238                    format!("vendors[{i}].weight"),
239                    v.weight,
240                    "vendor weight must be > 0",
241                ));
242            }
243        }
244        Ok(())
245    }
246
247    /// Vendor list indexed by `VendorId` for fast lookup.
248    #[must_use]
249    pub fn vendors_by_id(&self) -> BTreeMap<VendorId, &VendorRuleMatch> {
250        let mut map: BTreeMap<VendorId, &VendorRuleMatch> = BTreeMap::new();
251        for v in &self.vendors {
252            map.insert(v.vendor, v);
253        }
254        map
255    }
256}
257
258/// Parse a raw TOML payload into a [`ResolutionRule`].
259///
260/// # Errors
261///
262/// Returns [`VendorResolverError`] when the TOML fails to parse,
263/// the declared `[[vendors]]` entries reference an unknown
264/// [`VendorId`], or the resulting [`ResolutionRule`] fails
265/// [`validate`][ResolutionRule::validate].
266pub fn parse_resolution_rule(toml_text: &str) -> Result<ResolutionRule, VendorResolverError> {
267    let rule: ResolutionRule = toml::from_str(toml_text)?;
268    rule.validate()?;
269    Ok(rule)
270}
271
272#[cfg(test)]
273#[allow(
274    clippy::unwrap_used,
275    clippy::expect_used,
276    clippy::panic,
277    clippy::indexing_slicing
278)]
279mod tests {
280    use super::*;
281
282    fn ok_rule() -> ResolutionRule {
283        ResolutionRule {
284            id: "tier2-hostile".to_string(),
285            playbook_id: "tier2-hostile".to_string(),
286            target_class: TargetClass::HighSecurity,
287            priority: 0,
288            merge_strategy: MergeStrategy::StrongestVendor,
289            description: "Hostile anti-bot vendors".to_string(),
290            min_confidence: 0.60,
291            min_score: 5,
292            require_unknown_vendor: false,
293            vendors: vec![VendorRuleMatch {
294                vendor: VendorId::DataDome,
295                weight: 10,
296            }],
297        }
298    }
299
300    #[test]
301    fn valid_rule_passes_validation() {
302        assert!(ok_rule().validate().is_ok());
303    }
304
305    #[test]
306    fn empty_rule_id_is_rejected() {
307        let mut r = ok_rule();
308        r.id.clear();
309        let err = r.validate().expect_err("empty rule id");
310        assert_eq!(err.field_path(), Some("id"));
311    }
312
313    #[test]
314    fn out_of_range_confidence_is_rejected() {
315        let mut r = ok_rule();
316        r.min_confidence = 1.5;
317        let err = r.validate().expect_err("bad confidence");
318        assert_eq!(err.field_path(), Some("min_confidence"));
319    }
320
321    #[test]
322    fn vendors_required_when_confidence_above_zero() {
323        let mut r = ok_rule();
324        r.vendors.clear();
325        let err = r.validate().expect_err("empty vendors");
326        assert_eq!(err.field_path(), Some("vendors"));
327    }
328
329    #[test]
330    fn manual_strategy_requires_empty_playbook_id() {
331        let mut r = ok_rule();
332        r.merge_strategy = MergeStrategy::Manual;
333        r.playbook_id = "tier2-hostile".to_string();
334        let err = r.validate().expect_err("manual w/ playbook_id");
335        assert_eq!(err.field_path(), Some("playbook_id"));
336    }
337
338    #[test]
339    fn non_manual_strategy_requires_non_empty_playbook_id() {
340        let mut r = ok_rule();
341        r.playbook_id.clear();
342        let err = r.validate().expect_err("non-manual w/ empty playbook_id");
343        assert_eq!(err.field_path(), Some("playbook_id"));
344    }
345
346    #[test]
347    fn zero_weight_vendor_is_rejected() {
348        let mut r = ok_rule();
349        r.vendors[0].weight = 0;
350        let err = r.validate().expect_err("zero weight");
351        let path = err.field_path().unwrap_or("");
352        assert!(path.contains("vendors[0]"), "got {path}");
353    }
354
355    #[test]
356    fn merge_strategy_labels_are_stable() {
357        assert_eq!(MergeStrategy::StrongestVendor.label(), "strongest_vendor");
358        assert_eq!(MergeStrategy::Single.label(), "single");
359        assert_eq!(MergeStrategy::Manual.label(), "manual");
360    }
361
362    #[test]
363    fn parse_round_trip_through_toml() {
364        let toml_text = r#"
365id = "tier2-hostile"
366playbook_id = "tier2-hostile"
367target_class = "high_security"
368priority = 0
369merge_strategy = "strongest_vendor"
370description = "Hostile anti-bot vendors"
371min_confidence = 0.60
372min_score = 5
373
374[[vendors]]
375vendor = "datadome"
376weight = 10
377"#;
378        let rule = parse_resolution_rule(toml_text).expect("parse");
379        assert_eq!(rule.id, "tier2-hostile");
380        assert_eq!(rule.target_class, TargetClass::HighSecurity);
381        assert_eq!(rule.vendors.len(), 1);
382        assert_eq!(rule.vendors[0].vendor, VendorId::DataDome);
383    }
384
385    #[test]
386    fn vendors_by_id_groups_correctly() {
387        let rule = ResolutionRule {
388            id: "x".to_string(),
389            playbook_id: "x".to_string(),
390            target_class: TargetClass::Unknown,
391            priority: 0,
392            merge_strategy: MergeStrategy::Single,
393            description: String::new(),
394            min_confidence: 0.0,
395            min_score: 0,
396            require_unknown_vendor: false,
397            vendors: vec![
398                VendorRuleMatch {
399                    vendor: VendorId::DataDome,
400                    weight: 5,
401                },
402                VendorRuleMatch {
403                    vendor: VendorId::Cloudflare,
404                    weight: 7,
405                },
406            ],
407        };
408        let map = rule.vendors_by_id();
409        assert_eq!(map.get(&VendorId::DataDome).map(|v| v.weight), Some(5));
410        assert_eq!(map.get(&VendorId::Cloudflare).map(|v| v.weight), Some(7));
411    }
412}