Skip to main content

stygian_charon/challenge_feedback/
memory.rs

1use std::num::NonZeroUsize;
2use std::time::{Duration, SystemTime, UNIX_EPOCH};
3
4use serde::{Deserialize, Serialize};
5
6use crate::cache::LruTtlStore;
7use crate::challenge_feedback::ChallengeOutcome;
8use crate::types::TargetClass;
9
10/// Default TTL for the challenge memory: **10 minutes**.
11///
12/// This is short enough that one-off escalations decay quickly (so a
13/// single transient captcha does not poison the policy for hours)
14/// and long enough to span a typical scraping session that might
15/// retry the same domain several times before the operator decides
16/// to back off entirely.
17pub const DEFAULT_CHALLENGE_TTL: Duration = Duration::from_mins(10);
18
19/// Default capacity (in `(domain, target_class)` entries) for the
20/// challenge memory. Conservative default — most workflows touch
21/// only a handful of distinct target classes.
22#[allow(clippy::unwrap_used)]
23pub const DEFAULT_CHALLENGE_CAPACITY: NonZeroUsize = match NonZeroUsize::new(64) {
24    Some(value) => value,
25    None => NonZeroUsize::MIN,
26};
27
28/// Default TTL for the system clock fallback when wall-clock time is
29/// unavailable. The value is small enough that a zero-second
30/// `recorded_at_unix_secs` is distinguishable from a real timestamp
31/// while still being a valid serialisation.
32const ZERO_FALLBACK_UNIX_SECS: u64 = 0;
33
34/// Build a stable, lower-cased cache key for the challenge memory
35/// entry keyed by `(domain, target_class)`.
36///
37/// # Example
38///
39/// ```
40/// use stygian_charon::challenge_feedback::challenge_memory_key;
41/// use stygian_charon::types::TargetClass;
42///
43/// let key = challenge_memory_key("Example.COM", TargetClass::Api);
44/// assert!(key.starts_with("charon:challenge:example.com:"));
45/// ```
46#[must_use]
47pub fn challenge_memory_key(domain: &str, target_class: TargetClass) -> String {
48    format!(
49        "charon:challenge:{}:{}",
50        domain.to_ascii_lowercase(),
51        target_class_label(target_class)
52    )
53}
54
55const fn target_class_label(c: TargetClass) -> &'static str {
56    match c {
57        TargetClass::Api => "api",
58        TargetClass::ContentSite => "content_site",
59        TargetClass::HighSecurity => "high_security",
60        TargetClass::Unknown => "unknown",
61    }
62}
63
64/// One entry in the challenge memory.
65///
66/// An entry represents the **last observed** outcome for a single
67/// `(domain, target_class)` pair, along with a count of how many
68/// times the runner has recorded an outcome for that key (capped at
69/// `u32::MAX` for monotonic counters). The TTL is owned by the
70/// `LruTtlStore` backing the
71/// [`ChallengeMemory`] — once the LRU entry expires, the whole
72/// entry is dropped and the runner falls back to the unadjusted
73/// risk score.
74///
75/// # Example
76///
77/// ```
78/// use stygian_charon::challenge_feedback::{ChallengeMemoryEntry, ChallengeOutcome};
79/// use stygian_charon::types::TargetClass;
80///
81/// let entry = ChallengeMemoryEntry {
82///     domain: "example.com".to_string(),
83///     target_class: TargetClass::ContentSite,
84///     last_outcome: ChallengeOutcome::HardChallenge,
85///     observation_count: 1,
86///     recorded_at_unix_secs: 1_700_000_000,
87/// };
88/// assert_eq!(entry.risk_delta(), ChallengeOutcome::HardChallenge.risk_delta());
89/// ```
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91pub struct ChallengeMemoryEntry {
92    /// Lower-cased host the outcome was recorded for.
93    pub domain: String,
94    /// Target class the outcome was recorded for.
95    pub target_class: TargetClass,
96    /// Most recently recorded outcome for this key.
97    pub last_outcome: ChallengeOutcome,
98    /// Number of outcomes the runner has recorded for this key
99    /// (saturating on overflow).
100    pub observation_count: u32,
101    /// Unix epoch seconds when the entry was last updated.
102    pub recorded_at_unix_secs: u64,
103}
104
105impl ChallengeMemoryEntry {
106    /// Risk-score contribution this entry would add to the next
107    /// policy. Delegates to
108    /// [`ChallengeOutcome::risk_delta`][crate::challenge_feedback::ChallengeOutcome::risk_delta]
109    /// and is therefore bounded by
110    /// [`MAX_RISK_DELTA`][crate::challenge_feedback::MAX_RISK_DELTA].
111    #[must_use]
112    pub const fn risk_delta(&self) -> f64 {
113        self.last_outcome.risk_delta()
114    }
115}
116
117/// Short-horizon, capacity-bounded LRU memory of challenge outcomes
118/// keyed by `(domain, target_class)`.
119///
120/// The store reuses the same `LruTtlStore`
121/// primitive that backs the investigation report cache. That keeps
122/// eviction + expiry semantics consistent across both caches and
123/// satisfies the "no new cache store" requirement.
124///
125/// # Example
126///
127/// ```
128/// use stygian_charon::challenge_feedback::{ChallengeMemory, ChallengeOutcome};
129/// use stygian_charon::types::TargetClass;
130/// use std::num::NonZeroUsize;
131/// use std::time::Duration;
132///
133/// let memory =
134///     ChallengeMemory::new(NonZeroUsize::new(8).expect("non-zero"), Duration::from_mins(5));
135/// memory.record("example.com", TargetClass::ContentSite, ChallengeOutcome::Captcha);
136/// let entry = memory.lookup("example.com", TargetClass::ContentSite).expect("entry");
137/// assert_eq!(entry.last_outcome, ChallengeOutcome::Captcha);
138/// assert_eq!(entry.observation_count, 1);
139/// ```
140pub struct ChallengeMemory {
141    store: LruTtlStore<ChallengeMemoryEntry>,
142}
143
144impl ChallengeMemory {
145    /// Create a new challenge memory with explicit capacity and TTL.
146    #[must_use]
147    pub fn new(capacity: NonZeroUsize, ttl: Duration) -> Self {
148        Self {
149            store: LruTtlStore::new(capacity, ttl),
150        }
151    }
152
153    /// Create a new challenge memory with
154    /// `DEFAULT_CHALLENGE_CAPACITY` and
155    /// `DEFAULT_CHALLENGE_TTL`.
156    #[must_use]
157    pub fn with_default_ttl(capacity: NonZeroUsize) -> Self {
158        Self::new(capacity, DEFAULT_CHALLENGE_TTL)
159    }
160
161    /// Capacity-bounded [`ChallengeMemory`] with the default
162    /// capacity and TTL.
163    #[must_use]
164    pub fn with_defaults() -> Self {
165        Self::new(DEFAULT_CHALLENGE_CAPACITY, DEFAULT_CHALLENGE_TTL)
166    }
167
168    /// Record a challenge outcome for a `(domain, target_class)`
169    /// key. Replaces the existing entry (if any) and increments the
170    /// observation counter atomically with the read-modify-write
171    /// sequence. Lower-cases the domain for stable keying.
172    ///
173    /// # Example
174    ///
175    /// ```
176    /// use stygian_charon::challenge_feedback::{ChallengeMemory, ChallengeOutcome};
177    /// use stygian_charon::types::TargetClass;
178    ///
179    /// let memory = ChallengeMemory::with_defaults();
180    /// memory.record("Example.COM", TargetClass::Api, ChallengeOutcome::Pass);
181    /// let entry = memory.lookup("example.com", TargetClass::Api).unwrap();
182    /// assert_eq!(entry.last_outcome, ChallengeOutcome::Pass);
183    /// assert_eq!(entry.observation_count, 1);
184    /// ```
185    pub fn record(&self, domain: &str, target_class: TargetClass, outcome: ChallengeOutcome) {
186        let key = challenge_memory_key(domain, target_class);
187        let lower = domain.to_ascii_lowercase();
188        let next_count = self
189            .store
190            .peek(&key)
191            .map_or(1, |existing| existing.observation_count.saturating_add(1));
192        let entry = ChallengeMemoryEntry {
193            domain: lower,
194            target_class,
195            last_outcome: outcome,
196            observation_count: next_count,
197            recorded_at_unix_secs: current_unix_secs(),
198        };
199        self.store.put(key, entry);
200    }
201
202    /// Look up the current entry for a `(domain, target_class)` key.
203    /// Returns `None` if the key is absent or has expired.
204    ///
205    /// # Example
206    ///
207    /// ```
208    /// use stygian_charon::challenge_feedback::ChallengeMemory;
209    /// use stygian_charon::types::TargetClass;
210    ///
211    /// let memory = ChallengeMemory::with_defaults();
212    /// assert!(memory.lookup("nope.example", TargetClass::Api).is_none());
213    /// ```
214    #[must_use]
215    pub fn lookup(&self, domain: &str, target_class: TargetClass) -> Option<ChallengeMemoryEntry> {
216        self.store.get(&challenge_memory_key(domain, target_class))
217    }
218
219    /// Number of entries currently retained.
220    #[must_use]
221    pub fn len(&self) -> usize {
222        self.store.len()
223    }
224
225    /// `true` if the memory has zero entries.
226    #[must_use]
227    pub fn is_empty(&self) -> bool {
228        self.store.is_empty()
229    }
230
231    /// Remove all entries.
232    pub fn clear(&self) {
233        self.store.clear();
234    }
235
236    /// Invalidate a single `(domain, target_class)` key.
237    pub fn invalidate(&self, domain: &str, target_class: TargetClass) {
238        self.store
239            .invalidate(&challenge_memory_key(domain, target_class));
240    }
241}
242
243fn current_unix_secs() -> u64 {
244    SystemTime::now()
245        .duration_since(UNIX_EPOCH)
246        .map_or(ZERO_FALLBACK_UNIX_SECS, |duration| duration.as_secs())
247}
248
249#[cfg(test)]
250#[allow(
251    clippy::unwrap_used,
252    clippy::expect_used,
253    clippy::panic,
254    clippy::indexing_slicing
255)]
256mod tests {
257    use super::*;
258    use std::thread;
259
260    #[test]
261    fn record_overwrites_last_outcome_and_increments_count() {
262        let memory = ChallengeMemory::new(NonZeroUsize::new(4).unwrap(), Duration::from_mins(1));
263        let key = ("example.com", TargetClass::ContentSite);
264
265        memory.record(key.0, key.1, ChallengeOutcome::Pass);
266        memory.record(key.0, key.1, ChallengeOutcome::HardChallenge);
267        memory.record(key.0, key.1, ChallengeOutcome::Captcha);
268
269        let entry = memory.lookup(key.0, key.1).expect("entry present");
270        assert_eq!(entry.last_outcome, ChallengeOutcome::Captcha);
271        assert_eq!(entry.observation_count, 3);
272        assert_eq!(entry.domain, "example.com");
273        assert_eq!(entry.target_class, TargetClass::ContentSite);
274    }
275
276    #[test]
277    fn entries_decay_after_ttl() {
278        let memory = ChallengeMemory::new(NonZeroUsize::new(4).unwrap(), Duration::from_millis(1));
279        memory.record("example.com", TargetClass::Api, ChallengeOutcome::Blocked);
280        thread::sleep(Duration::from_millis(5));
281        assert!(memory.lookup("example.com", TargetClass::Api).is_none());
282    }
283
284    #[test]
285    fn distinct_target_classes_keep_distinct_entries() {
286        let memory = ChallengeMemory::new(NonZeroUsize::new(8).unwrap(), Duration::from_mins(1));
287
288        memory.record("example.com", TargetClass::Api, ChallengeOutcome::Pass);
289        memory.record(
290            "example.com",
291            TargetClass::ContentSite,
292            ChallengeOutcome::Captcha,
293        );
294
295        let api = memory.lookup("example.com", TargetClass::Api).unwrap();
296        let content = memory
297            .lookup("example.com", TargetClass::ContentSite)
298            .unwrap();
299
300        assert_eq!(api.last_outcome, ChallengeOutcome::Pass);
301        assert_eq!(content.last_outcome, ChallengeOutcome::Captcha);
302    }
303
304    #[test]
305    fn clear_drops_everything() {
306        let memory = ChallengeMemory::new(NonZeroUsize::new(4).unwrap(), Duration::from_mins(1));
307        memory.record("example.com", TargetClass::Api, ChallengeOutcome::Pass);
308        memory.record("other.example", TargetClass::Api, ChallengeOutcome::Blocked);
309        assert_eq!(memory.len(), 2);
310        memory.clear();
311        assert!(memory.is_empty());
312    }
313
314    #[test]
315    fn domain_is_normalised_to_lower_case() {
316        let memory = ChallengeMemory::new(NonZeroUsize::new(4).unwrap(), Duration::from_mins(1));
317        memory.record(
318            "Example.COM",
319            TargetClass::Api,
320            ChallengeOutcome::SoftChallenge,
321        );
322        let entry = memory.lookup("EXAMPLE.com", TargetClass::Api).unwrap();
323        assert_eq!(entry.domain, "example.com");
324        assert_eq!(entry.last_outcome, ChallengeOutcome::SoftChallenge);
325    }
326
327    #[test]
328    fn risk_delta_uses_last_outcome() {
329        let memory = ChallengeMemory::new(NonZeroUsize::new(4).unwrap(), Duration::from_mins(1));
330        memory.record(
331            "example.com",
332            TargetClass::Api,
333            ChallengeOutcome::HardChallenge,
334        );
335        let entry = memory.lookup("example.com", TargetClass::Api).unwrap();
336        assert!((entry.risk_delta() - ChallengeOutcome::HardChallenge.risk_delta()).abs() < 1e-9);
337    }
338
339    #[test]
340    fn lru_capacity_is_respected() {
341        let memory = ChallengeMemory::new(NonZeroUsize::new(2).unwrap(), Duration::from_mins(1));
342        memory.record("a.example", TargetClass::Api, ChallengeOutcome::Pass);
343        memory.record("b.example", TargetClass::Api, ChallengeOutcome::Pass);
344        memory.record("c.example", TargetClass::Api, ChallengeOutcome::Pass);
345        assert!(memory.len() <= 2);
346    }
347}