Skip to main content

stygian_charon/
adaptive.rs

1use std::collections::BTreeMap;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::sync::Mutex;
5use std::time::{SystemTime, UNIX_EPOCH};
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10use crate::types::{BlockedRatioSlo, RequirementLevel, TargetClass};
11
12/// Errors returned by adaptive SLO policy operations.
13#[derive(Debug, Error)]
14pub enum AdaptivePolicyError {
15    /// The persisted history file could not be read.
16    #[error("failed to read adaptive policy store '{path}': {source}")]
17    ReadStore {
18        /// Path of the failing store.
19        path: PathBuf,
20        /// Source I/O error.
21        source: std::io::Error,
22    },
23    /// The persisted history file could not be parsed.
24    #[error("failed to parse adaptive policy store '{path}': {source}")]
25    ParseStore {
26        /// Path of the failing store.
27        path: PathBuf,
28        /// Source JSON parsing error.
29        source: serde_json::Error,
30    },
31    /// The history store could not be written.
32    #[error("failed to write adaptive policy store '{path}': {source}")]
33    WriteStore {
34        /// Path of the failing store.
35        path: PathBuf,
36        /// Source I/O error.
37        source: std::io::Error,
38    },
39    /// In-memory history could not be serialized.
40    #[error("failed to serialize adaptive policy history: {0}")]
41    Serialize(serde_json::Error),
42}
43
44/// Pluggable adaptive SLO policy interface.
45///
46/// # Example
47///
48/// ```no_run
49/// use stygian_charon::{AdaptiveSloPolicy, BlockedRatioSlo, RegressionHistoryPolicy, RequirementLevel, TargetClass};
50///
51/// # fn run() -> Result<(), Box<dyn std::error::Error>> {
52/// let policy = RegressionHistoryPolicy::new();
53/// policy.record_observation(
54///     "https://example.com",
55///     TargetClass::ContentSite,
56///     0.18,
57///     RequirementLevel::Medium,
58/// )?;
59///
60/// let adapted = policy.select_slo(
61///     "https://example.com",
62///     TargetClass::ContentSite,
63///     BlockedRatioSlo::content_site(),
64/// );
65/// assert!(adapted.acceptable <= adapted.warning);
66/// # Ok(())
67/// # }
68/// ```
69pub trait AdaptiveSloPolicy: Send + Sync {
70    /// Select an adjusted SLO for a target using historical observations.
71    ///
72    /// # Example
73    ///
74    /// ```no_run
75    /// use stygian_charon::{AdaptiveSloPolicy, BlockedRatioSlo, RegressionHistoryPolicy, TargetClass};
76    ///
77    /// let policy = RegressionHistoryPolicy::new();
78    /// let slo = policy.select_slo(
79    ///     "https://example.com",
80    ///     TargetClass::Api,
81    ///     BlockedRatioSlo::api(),
82    /// );
83    /// assert!(slo.acceptable <= slo.warning);
84    /// ```
85    fn select_slo(
86        &self,
87        target: &str,
88        target_class: TargetClass,
89        default: BlockedRatioSlo,
90    ) -> BlockedRatioSlo;
91
92    /// Record a new blocked-ratio observation for a target.
93    ///
94    /// # Errors
95    ///
96    /// Returns [`AdaptivePolicyError`] when persistence fails.
97    ///
98    /// # Example
99    ///
100    /// ```no_run
101    /// use stygian_charon::{AdaptiveSloPolicy, RegressionHistoryPolicy, RequirementLevel, TargetClass};
102    ///
103    /// # fn run() -> Result<(), Box<dyn std::error::Error>> {
104    /// let policy = RegressionHistoryPolicy::new();
105    /// policy.record_observation(
106    ///     "https://example.com",
107    ///     TargetClass::Api,
108    ///     0.04,
109    ///     RequirementLevel::Low,
110    /// )?;
111    /// # Ok(())
112    /// # }
113    /// ```
114    fn record_observation(
115        &self,
116        target: &str,
117        target_class: TargetClass,
118        blocked_ratio: f64,
119        escalation_level: RequirementLevel,
120    ) -> Result<(), AdaptivePolicyError>;
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
124struct TargetObservation {
125    target_class: TargetClass,
126    blocked_ratio: f64,
127    escalation_level: RequirementLevel,
128    observed_at_unix_secs: u64,
129}
130
131#[derive(Debug, Clone)]
132struct AdaptiveBounds {
133    min_acceptable: f64,
134    max_acceptable: f64,
135    max_shift: f64,
136    min_warning_gap: f64,
137    min_critical_gap: f64,
138}
139
140impl AdaptiveBounds {
141    const fn for_class(target_class: TargetClass) -> Self {
142        match target_class {
143            TargetClass::Api | TargetClass::Unknown => Self {
144                min_acceptable: 0.01,
145                max_acceptable: 0.20,
146                max_shift: 0.08,
147                min_warning_gap: 0.03,
148                min_critical_gap: 0.03,
149            },
150            TargetClass::ContentSite => Self {
151                min_acceptable: 0.05,
152                max_acceptable: 0.35,
153                max_shift: 0.12,
154                min_warning_gap: 0.05,
155                min_critical_gap: 0.06,
156            },
157            TargetClass::HighSecurity => Self {
158                min_acceptable: 0.15,
159                max_acceptable: 0.55,
160                max_shift: 0.15,
161                min_warning_gap: 0.08,
162                min_critical_gap: 0.08,
163            },
164        }
165    }
166}
167
168/// Adaptive SLO policy backed by per-target blocked-ratio history.
169///
170/// Uses bounded threshold shifts around default class SLOs. History can be
171/// persisted to JSON for process restarts and operator inspection.
172#[derive(Debug)]
173pub struct RegressionHistoryPolicy {
174    store_path: Option<PathBuf>,
175    history: Mutex<BTreeMap<String, Vec<TargetObservation>>>,
176    max_observations_per_target: usize,
177}
178
179impl RegressionHistoryPolicy {
180    /// Create an in-memory adaptive policy with no persistence.
181    ///
182    /// # Example
183    ///
184    /// ```
185    /// use stygian_charon::RegressionHistoryPolicy;
186    ///
187    /// let policy = RegressionHistoryPolicy::new();
188    /// assert!(policy.tracked_target_count() == 0);
189    /// ```
190    #[must_use]
191    pub const fn new() -> Self {
192        Self {
193            store_path: None,
194            history: Mutex::new(BTreeMap::new()),
195            max_observations_per_target: 256,
196        }
197    }
198
199    /// Create an adaptive policy that persists history to a JSON file.
200    ///
201    /// If the file exists, it is loaded on startup.
202    ///
203    /// # Errors
204    ///
205    /// Returns [`AdaptivePolicyError`] when loading the store fails.
206    ///
207    /// # Example
208    ///
209    /// ```no_run
210    /// use stygian_charon::RegressionHistoryPolicy;
211    ///
212    /// # fn run() -> Result<(), Box<dyn std::error::Error>> {
213    /// let _policy = RegressionHistoryPolicy::with_json_store("./charon-history.json")?;
214    /// # Ok(())
215    /// # }
216    /// ```
217    pub fn with_json_store(path: impl AsRef<Path>) -> Result<Self, AdaptivePolicyError> {
218        let path_buf = path.as_ref().to_path_buf();
219        let history = if path_buf.exists() {
220            let content =
221                fs::read_to_string(&path_buf).map_err(|source| AdaptivePolicyError::ReadStore {
222                    path: path_buf.clone(),
223                    source,
224                })?;
225            serde_json::from_str::<BTreeMap<String, Vec<TargetObservation>>>(&content).map_err(
226                |source| AdaptivePolicyError::ParseStore {
227                    path: path_buf.clone(),
228                    source,
229                },
230            )?
231        } else {
232            BTreeMap::new()
233        };
234
235        Ok(Self {
236            store_path: Some(path_buf),
237            history: Mutex::new(history),
238            max_observations_per_target: 256,
239        })
240    }
241
242    /// Number of unique targets tracked by history.
243    #[must_use]
244    pub fn tracked_target_count(&self) -> usize {
245        let Ok(history) = self.history.lock() else {
246            return 0;
247        };
248        history.len()
249    }
250
251    /// Number of observations currently retained for one target.
252    #[must_use]
253    pub fn observations_for_target(&self, target: &str) -> usize {
254        let Ok(history) = self.history.lock() else {
255            return 0;
256        };
257        history.get(target).map_or(0, Vec::len)
258    }
259
260    fn persist_locked(
261        &self,
262        history: &BTreeMap<String, Vec<TargetObservation>>,
263    ) -> Result<(), AdaptivePolicyError> {
264        let Some(path) = &self.store_path else {
265            return Ok(());
266        };
267
268        let serialized =
269            serde_json::to_string_pretty(history).map_err(AdaptivePolicyError::Serialize)?;
270        fs::write(path, serialized).map_err(|source| AdaptivePolicyError::WriteStore {
271            path: path.clone(),
272            source,
273        })
274    }
275
276    fn avg_blocked_ratio(observations: &[TargetObservation]) -> Option<f64> {
277        if observations.is_empty() {
278            return None;
279        }
280
281        let sum = observations.iter().map(|o| o.blocked_ratio).sum::<f64>();
282        let count = observations.len();
283        let Ok(count_u32) = u32::try_from(count) else {
284            return None;
285        };
286
287        Some(sum / f64::from(count_u32))
288    }
289
290    const fn clamp_unit(value: f64) -> f64 {
291        if value < 0.0 {
292            0.0
293        } else if value > 1.0 {
294            1.0
295        } else {
296            value
297        }
298    }
299}
300
301impl Default for RegressionHistoryPolicy {
302    fn default() -> Self {
303        Self::new()
304    }
305}
306
307impl AdaptiveSloPolicy for RegressionHistoryPolicy {
308    fn select_slo(
309        &self,
310        target: &str,
311        target_class: TargetClass,
312        default: BlockedRatioSlo,
313    ) -> BlockedRatioSlo {
314        let Ok(history) = self.history.lock() else {
315            return default;
316        };
317
318        let Some(observations) = history.get(target) else {
319            return default;
320        };
321
322        let Some(avg_ratio) = Self::avg_blocked_ratio(observations) else {
323            return default;
324        };
325
326        // Require at least three samples before adapting thresholds.
327        if observations.len() < 3 {
328            return default;
329        }
330
331        let bounds = AdaptiveBounds::for_class(target_class);
332        let shift = (avg_ratio - default.acceptable).clamp(-bounds.max_shift, bounds.max_shift);
333
334        let acceptable =
335            (default.acceptable + shift).clamp(bounds.min_acceptable, bounds.max_acceptable);
336        let warning = (default.warning + shift)
337            .max(acceptable + bounds.min_warning_gap)
338            .min(0.95);
339        let critical = (default.critical + shift)
340            .max(warning + bounds.min_critical_gap)
341            .min(0.99);
342
343        BlockedRatioSlo {
344            target_class,
345            acceptable: Self::clamp_unit(acceptable),
346            warning: Self::clamp_unit(warning),
347            critical: Self::clamp_unit(critical),
348        }
349    }
350
351    fn record_observation(
352        &self,
353        target: &str,
354        target_class: TargetClass,
355        blocked_ratio: f64,
356        escalation_level: RequirementLevel,
357    ) -> Result<(), AdaptivePolicyError> {
358        let Ok(mut history) = self.history.lock() else {
359            return Ok(());
360        };
361
362        let entry = history.entry(target.to_string()).or_default();
363        entry.push(TargetObservation {
364            target_class,
365            blocked_ratio: Self::clamp_unit(blocked_ratio),
366            escalation_level,
367            observed_at_unix_secs: SystemTime::now()
368                .duration_since(UNIX_EPOCH)
369                .map_or(0, |duration| duration.as_secs()),
370        });
371
372        if entry.len() > self.max_observations_per_target {
373            let overflow = entry.len() - self.max_observations_per_target;
374            entry.drain(0..overflow);
375        }
376
377        self.persist_locked(&history)
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[test]
386    fn regression_history_tracks_at_least_ten_targets() {
387        let policy = RegressionHistoryPolicy::new();
388
389        for index in 0_u32..10 {
390            let target = format!("https://example{index}.com");
391            let result = policy.record_observation(
392                &target,
393                TargetClass::ContentSite,
394                0.10,
395                RequirementLevel::Low,
396            );
397            assert!(result.is_ok(), "record_observation should succeed");
398        }
399
400        assert!(policy.tracked_target_count() >= 10);
401    }
402
403    #[test]
404    fn adaptive_thresholds_preserve_zone_ordering() {
405        let policy = RegressionHistoryPolicy::new();
406        let target = "https://content.example";
407
408        for ratio in [0.20, 0.22, 0.24, 0.26] {
409            let result = policy.record_observation(
410                target,
411                TargetClass::ContentSite,
412                ratio,
413                RequirementLevel::Medium,
414            );
415            assert!(result.is_ok(), "record_observation should succeed");
416        }
417
418        let adjusted = policy.select_slo(
419            target,
420            TargetClass::ContentSite,
421            BlockedRatioSlo::content_site(),
422        );
423
424        assert!(adjusted.acceptable <= adjusted.warning);
425        assert!(adjusted.warning <= adjusted.critical);
426        assert!(adjusted.acceptable >= 0.05);
427        assert!(adjusted.critical <= 0.99);
428    }
429
430    #[test]
431    fn json_store_round_trips_history() {
432        let unique = SystemTime::now()
433            .duration_since(UNIX_EPOCH)
434            .map_or(0, |duration| duration.as_nanos());
435        let store = std::env::temp_dir().join(format!("adaptive-history-{unique}.json"));
436
437        let create_result = RegressionHistoryPolicy::with_json_store(&store);
438        assert!(create_result.is_ok(), "policy should initialize with store");
439        let Ok(policy) = create_result else {
440            return;
441        };
442
443        let write_result = policy.record_observation(
444            "https://api.example",
445            TargetClass::Api,
446            0.08,
447            RequirementLevel::Medium,
448        );
449        assert!(write_result.is_ok(), "record_observation should persist");
450
451        let reload_result = RegressionHistoryPolicy::with_json_store(&store);
452        assert!(reload_result.is_ok(), "policy should reload store");
453        let Ok(reloaded) = reload_result else {
454            return;
455        };
456
457        assert_eq!(reloaded.tracked_target_count(), 1);
458        assert_eq!(reloaded.observations_for_target("https://api.example"), 1);
459
460        let _ = fs::remove_file(store);
461    }
462}