stygian_charon/vendor_resolver/rules.rs
1//! Resolution rule schema (T90).
2//!
3//! A [`ResolutionRule`] is the **codified policy mapping** from a
4//! set of detected anti-bot vendors to a target playbook. Each
5//! rule carries:
6//!
7//! - the **playbook id** it resolves to (`tier2-hostile`,
8//! `tier1-js`, `tier1-static`, or the sentinel empty string for
9//! the `Manual` strategy marker),
10//! - the **target class** the playbook maps to,
11//! - a **priority** (lower wins) so multi-vendor and conflicting
12//! rule scenarios resolve deterministically,
13//! - the **vendor list** that triggers the rule, with per-vendor
14//! weights so the [`MergeStrategy`] can decide what to do when
15//! more than one listed vendor matched,
16//! - the **confidence/score gates** (`min_confidence`,
17//! `min_score`) the [`crate::vendor_classifier::VendorClassification`]
18//! must cross for the rule to fire, and
19//! - the **merge strategy** the resolver applies when the rule
20//! fires alongside one or more other rules (see the table in
21//! [`crate::vendor_resolver`]).
22//!
23//! ## Multi-vendor merge strategies
24//!
25//! | `MergeStrategy` | Behaviour |
26//! |---------------------|--------------------------------------------------------------------------------------------------|
27//! | `StrongestVendor` | Pick the highest-weight vendor in the rule and resolve with its playbook. |
28//! | `Single` | Pick the single matched vendor (lowest `VendorId` discriminant on ties) and resolve. |
29//! | `Manual` | Defer to manual mode — return the `StrategyMarker::Manual` marker. |
30//!
31//! The strategies are documented in the module rustdoc and shipped
32//! as data in `crates/stygian-charon/data/vendor_playbook_rules/`.
33
34use std::collections::BTreeMap;
35
36use serde::{Deserialize, Serialize};
37
38use crate::types::TargetClass;
39use crate::vendor_classifier::VendorId;
40use crate::vendor_resolver::error::VendorResolverError;
41
42/// How the resolver should combine multiple matched vendors into
43/// a single playbook decision.
44///
45/// See the module-level table for the documented behaviour of each
46/// variant.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum MergeStrategy {
50 /// Pick the highest-weight vendor in the rule and resolve with
51 /// its playbook.
52 StrongestVendor,
53 /// Pick the single matched vendor (lowest `VendorId`
54 /// discriminant on ties) and resolve with its playbook.
55 Single,
56 /// Defer to manual mode — return the
57 /// [`StrategyMarker::Manual`][crate::vendor_resolver::StrategyMarker::Manual]
58 /// marker so the caller preserves its existing manual mode
59 /// selection.
60 Manual,
61}
62
63impl MergeStrategy {
64 /// Stable lower-case wire label.
65 #[must_use]
66 pub const fn label(self) -> &'static str {
67 match self {
68 Self::StrongestVendor => "strongest_vendor",
69 Self::Single => "single",
70 Self::Manual => "manual",
71 }
72 }
73}
74
75/// One vendor entry inside a [`ResolutionRule::vendors`] list.
76///
77/// Each entry pairs a [`VendorId`] with a **rule-weight** (not to
78/// be confused with the classifier's signal weight). The rule-weight
79/// tells the [`MergeStrategy::StrongestVendor`] logic which vendor
80/// dominates when several listed vendors match simultaneously.
81#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
82pub struct VendorRuleMatch {
83 /// Vendor that triggers the rule. The TOML wire format uses
84 /// the [`label`][VendorId::label] (e.g. `"datadome"`,
85 /// `"perimeter_x"`) so the format matches the existing vendor
86 /// classifier TOML definitions rather than the serde
87 /// `snake_case` rename of the enum.
88 #[serde(deserialize_with = "deserialize_vendor_id_from_label")]
89 pub vendor: VendorId,
90 /// Per-rule weight used by
91 /// [`MergeStrategy::StrongestVendor`] when multiple listed
92 /// vendors match. Higher wins.
93 pub weight: u32,
94}
95
96fn deserialize_vendor_id_from_label<'de, D>(deserializer: D) -> Result<VendorId, D::Error>
97where
98 D: serde::Deserializer<'de>,
99{
100 use serde::Deserialize;
101 let label = String::deserialize(deserializer)?;
102 VendorId::from_label(&label)
103 .ok_or_else(|| serde::de::Error::custom(format!("unknown vendor label '{label}'")))
104}
105
106/// Single codified rule mapping vendor patterns to a playbook.
107///
108/// Rules are **ordered by priority** (lower numbers win). When two
109/// rules both match a [`crate::vendor_classifier::VendorClassification`]
110/// the resolver picks the lowest-priority rule, then applies its
111/// [`merge_strategy`][Self::merge_strategy] to combine any
112/// remaining rules into a single decision.
113///
114/// # Example
115///
116/// ```
117/// use stygian_charon::vendor_resolver::{MergeStrategy, ResolutionRule, VendorRuleMatch};
118/// use stygian_charon::types::TargetClass;
119/// use stygian_charon::vendor_classifier::VendorId;
120///
121/// let rule = ResolutionRule {
122/// id: "tier2-hostile".to_string(),
123/// playbook_id: "tier2-hostile".to_string(),
124/// target_class: TargetClass::HighSecurity,
125/// priority: 0,
126/// merge_strategy: MergeStrategy::StrongestVendor,
127/// description: "Hostile anti-bot vendors".to_string(),
128/// min_confidence: 0.60,
129/// min_score: 5,
130/// require_unknown_vendor: false,
131/// vendors: vec![VendorRuleMatch {
132/// vendor: VendorId::DataDome,
133/// weight: 10,
134/// }],
135/// };
136/// assert!(rule.validate().is_ok());
137/// ```
138#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
139pub struct ResolutionRule {
140 /// Stable rule id (`"tier2-hostile"`, `"tier1-js-cloudflare"`,
141 /// etc.). Required, non-empty, unique within a resolver bundle.
142 pub id: String,
143 /// Playbook id the rule resolves to. Empty string means the
144 /// `Manual` strategy marker should be returned instead.
145 pub playbook_id: String,
146 /// Target class the resolved playbook maps to.
147 pub target_class: TargetClass,
148 /// Priority (lower wins). The baseline rules use `0`
149 /// (tier2-hostile), `10` (tier1-js-cloudflare), `100`
150 /// (tier1-static), and `1000` (default-manual).
151 pub priority: u32,
152 /// Merge strategy used when this rule fires alongside other
153 /// matching rules.
154 pub merge_strategy: MergeStrategy,
155 /// Human-readable description for operator logs.
156 #[serde(default)]
157 pub description: String,
158 /// Minimum classifier confidence the top vendor must cross for
159 /// the rule to fire. Must be in `[0.0, 1.0]`.
160 #[serde(default = "default_min_confidence")]
161 pub min_confidence: f64,
162 /// Minimum classifier score the top vendor must reach for the
163 /// rule to fire. Must be `> 0` (or `0` for the catch-all
164 /// `default-manual` rule).
165 #[serde(default)]
166 pub min_score: u32,
167 /// When `true`, the rule only fires when the classifier reports
168 /// [`VendorId::Unknown`]. Used by the `tier1-static` rule so
169 /// benign unknown classifications do not accidentally swallow
170 /// single-signal low-confidence matches.
171 #[serde(default)]
172 pub require_unknown_vendor: bool,
173 /// Vendors that trigger the rule.
174 #[serde(default)]
175 pub vendors: Vec<VendorRuleMatch>,
176}
177
178const fn default_min_confidence() -> f64 {
179 0.0
180}
181
182impl ResolutionRule {
183 /// Validate the rule's internal consistency. Reports the first
184 /// failing field with a structured error that includes both the
185 /// rule id and the field path.
186 ///
187 /// # Errors
188 ///
189 /// Returns [`VendorResolverError`] on the first inconsistency.
190 /// The error embeds the **rule id**, the **field path**, and
191 /// the **bad value** so operators can locate the offending
192 /// TOML line without re-running the loader.
193 pub fn validate(&self) -> Result<(), VendorResolverError> {
194 if self.id.trim().is_empty() {
195 return Err(VendorResolverError::invalid_rule(
196 self.id.clone(),
197 "id",
198 self.id.clone(),
199 "rule id must be a non-empty string",
200 ));
201 }
202 if !(0.0..=1.0).contains(&self.min_confidence) {
203 return Err(VendorResolverError::invalid_rule(
204 self.id.clone(),
205 "min_confidence",
206 self.min_confidence,
207 "min_confidence must be in [0.0, 1.0]",
208 ));
209 }
210 if self.min_confidence > 0.0 && self.vendors.is_empty() {
211 return Err(VendorResolverError::invalid_rule(
212 self.id.clone(),
213 "vendors",
214 "<empty>",
215 "vendors list must be non-empty when min_confidence > 0.0",
216 ));
217 }
218 if self.merge_strategy == MergeStrategy::Manual && !self.playbook_id.is_empty() {
219 return Err(VendorResolverError::invalid_rule(
220 self.id.clone(),
221 "playbook_id",
222 self.playbook_id.clone(),
223 "playbook_id must be empty when merge_strategy = manual",
224 ));
225 }
226 if self.merge_strategy != MergeStrategy::Manual && self.playbook_id.is_empty() {
227 return Err(VendorResolverError::invalid_rule(
228 self.id.clone(),
229 "playbook_id",
230 self.playbook_id.clone(),
231 "playbook_id must be a non-empty string when merge_strategy is not manual",
232 ));
233 }
234 for (i, v) in self.vendors.iter().enumerate() {
235 if v.weight == 0 {
236 return Err(VendorResolverError::invalid_rule(
237 self.id.clone(),
238 format!("vendors[{i}].weight"),
239 v.weight,
240 "vendor weight must be > 0",
241 ));
242 }
243 }
244 Ok(())
245 }
246
247 /// Vendor list indexed by `VendorId` for fast lookup.
248 #[must_use]
249 pub fn vendors_by_id(&self) -> BTreeMap<VendorId, &VendorRuleMatch> {
250 let mut map: BTreeMap<VendorId, &VendorRuleMatch> = BTreeMap::new();
251 for v in &self.vendors {
252 map.insert(v.vendor, v);
253 }
254 map
255 }
256}
257
258/// Parse a raw TOML payload into a [`ResolutionRule`].
259///
260/// # Errors
261///
262/// Returns [`VendorResolverError`] when the TOML fails to parse,
263/// the declared `[[vendors]]` entries reference an unknown
264/// [`VendorId`], or the resulting [`ResolutionRule`] fails
265/// [`validate`][ResolutionRule::validate].
266pub fn parse_resolution_rule(toml_text: &str) -> Result<ResolutionRule, VendorResolverError> {
267 let rule: ResolutionRule = toml::from_str(toml_text)?;
268 rule.validate()?;
269 Ok(rule)
270}
271
272#[cfg(test)]
273#[allow(
274 clippy::unwrap_used,
275 clippy::expect_used,
276 clippy::panic,
277 clippy::indexing_slicing
278)]
279mod tests {
280 use super::*;
281
282 fn ok_rule() -> ResolutionRule {
283 ResolutionRule {
284 id: "tier2-hostile".to_string(),
285 playbook_id: "tier2-hostile".to_string(),
286 target_class: TargetClass::HighSecurity,
287 priority: 0,
288 merge_strategy: MergeStrategy::StrongestVendor,
289 description: "Hostile anti-bot vendors".to_string(),
290 min_confidence: 0.60,
291 min_score: 5,
292 require_unknown_vendor: false,
293 vendors: vec![VendorRuleMatch {
294 vendor: VendorId::DataDome,
295 weight: 10,
296 }],
297 }
298 }
299
300 #[test]
301 fn valid_rule_passes_validation() {
302 assert!(ok_rule().validate().is_ok());
303 }
304
305 #[test]
306 fn empty_rule_id_is_rejected() {
307 let mut r = ok_rule();
308 r.id.clear();
309 let err = r.validate().expect_err("empty rule id");
310 assert_eq!(err.field_path(), Some("id"));
311 }
312
313 #[test]
314 fn out_of_range_confidence_is_rejected() {
315 let mut r = ok_rule();
316 r.min_confidence = 1.5;
317 let err = r.validate().expect_err("bad confidence");
318 assert_eq!(err.field_path(), Some("min_confidence"));
319 }
320
321 #[test]
322 fn vendors_required_when_confidence_above_zero() {
323 let mut r = ok_rule();
324 r.vendors.clear();
325 let err = r.validate().expect_err("empty vendors");
326 assert_eq!(err.field_path(), Some("vendors"));
327 }
328
329 #[test]
330 fn manual_strategy_requires_empty_playbook_id() {
331 let mut r = ok_rule();
332 r.merge_strategy = MergeStrategy::Manual;
333 r.playbook_id = "tier2-hostile".to_string();
334 let err = r.validate().expect_err("manual w/ playbook_id");
335 assert_eq!(err.field_path(), Some("playbook_id"));
336 }
337
338 #[test]
339 fn non_manual_strategy_requires_non_empty_playbook_id() {
340 let mut r = ok_rule();
341 r.playbook_id.clear();
342 let err = r.validate().expect_err("non-manual w/ empty playbook_id");
343 assert_eq!(err.field_path(), Some("playbook_id"));
344 }
345
346 #[test]
347 fn zero_weight_vendor_is_rejected() {
348 let mut r = ok_rule();
349 r.vendors[0].weight = 0;
350 let err = r.validate().expect_err("zero weight");
351 let path = err.field_path().unwrap_or("");
352 assert!(path.contains("vendors[0]"), "got {path}");
353 }
354
355 #[test]
356 fn merge_strategy_labels_are_stable() {
357 assert_eq!(MergeStrategy::StrongestVendor.label(), "strongest_vendor");
358 assert_eq!(MergeStrategy::Single.label(), "single");
359 assert_eq!(MergeStrategy::Manual.label(), "manual");
360 }
361
362 #[test]
363 fn parse_round_trip_through_toml() {
364 let toml_text = r#"
365id = "tier2-hostile"
366playbook_id = "tier2-hostile"
367target_class = "high_security"
368priority = 0
369merge_strategy = "strongest_vendor"
370description = "Hostile anti-bot vendors"
371min_confidence = 0.60
372min_score = 5
373
374[[vendors]]
375vendor = "datadome"
376weight = 10
377"#;
378 let rule = parse_resolution_rule(toml_text).expect("parse");
379 assert_eq!(rule.id, "tier2-hostile");
380 assert_eq!(rule.target_class, TargetClass::HighSecurity);
381 assert_eq!(rule.vendors.len(), 1);
382 assert_eq!(rule.vendors[0].vendor, VendorId::DataDome);
383 }
384
385 #[test]
386 fn vendors_by_id_groups_correctly() {
387 let rule = ResolutionRule {
388 id: "x".to_string(),
389 playbook_id: "x".to_string(),
390 target_class: TargetClass::Unknown,
391 priority: 0,
392 merge_strategy: MergeStrategy::Single,
393 description: String::new(),
394 min_confidence: 0.0,
395 min_score: 0,
396 require_unknown_vendor: false,
397 vendors: vec![
398 VendorRuleMatch {
399 vendor: VendorId::DataDome,
400 weight: 5,
401 },
402 VendorRuleMatch {
403 vendor: VendorId::Cloudflare,
404 weight: 7,
405 },
406 ],
407 };
408 let map = rule.vendors_by_id();
409 assert_eq!(map.get(&VendorId::DataDome).map(|v| v.weight), Some(5));
410 assert_eq!(map.get(&VendorId::Cloudflare).map(|v| v.weight), Some(7));
411 }
412}