Skip to main content

stygian_browser/interstitial_router/
router.rs

1//! Dedicated acquisition router for classified interstitials.
2//!
3//! Consumes an [`InterstitialKind`] (from
4//! [`InterstitialClassifier`][super::classifier::InterstitialClassifier])
5//! plus a [`PageSignature`][super::PageSignature] and
6//! returns a structured [`RouterDecision`] with the
7//! dedicated route, the dedicated severity tier, and the
8//! per-signature evidence.
9//!
10//! The router is a pure function — no I/O, no clock reads
11//! (the timestamp is captured at the
12//! [`RouterDecision::new`] boundary). The router
13//! composes with the classifier via
14//! [`classify_and_route`] (the most common one-shot
15//! helper) and the lower-level [`route`] (when the caller
16//! already classified).
17//!
18//! # Example
19//!
20//! ```
21//! use stygian_browser::interstitial_router::{
22//!     InterstitialKind, InterstitialRouter, InterstitialSeverity, PageSignature,
23//! };
24//!
25//! let router = InterstitialRouter::with_defaults();
26//! let sig = PageSignature::new("https://example.com/blocked", Some(403))
27//!     .with_body_marker("access denied");
28//! let decision = router.route(&sig, InterstitialKind::HardBlock);
29//! assert_eq!(decision.severity(), InterstitialSeverity::Terminal);
30//! assert!(decision.is_terminal());
31//! ```
32
33use crate::acquisition::StrategyUsed;
34
35use super::classifier::{InterstitialClassifier, PageSignature};
36use super::policy::{InterstitialKind, InterstitialPolicy, InterstitialRoute};
37use super::report::{PageSignatureEvidence, RouterDecision};
38
39/// Dedicated acquisition router for classified interstitials.
40///
41/// `InterstitialRouter` is constructed once with an
42/// [`InterstitialPolicy`] and is safe to share across
43/// threads (the policy is immutable; the classifier is
44/// stateless).
45///
46/// # Example
47///
48/// ```
49/// use stygian_browser::interstitial_router::{
50///     InterstitialKind, InterstitialRouter, PageSignature,
51/// };
52///
53/// let router = InterstitialRouter::with_defaults();
54/// let sig = PageSignature::new("https://example.com/redirect", Some(302));
55/// let decision = router.route(&sig, InterstitialKind::Transient);
56/// assert!(!decision.is_classified());
57/// assert!(decision.is_retryable());
58/// ```
59#[derive(Debug, Clone)]
60pub struct InterstitialRouter {
61    classifier: InterstitialClassifier,
62    policy: InterstitialPolicy,
63}
64
65impl Default for InterstitialRouter {
66    fn default() -> Self {
67        Self::with_defaults()
68    }
69}
70
71impl InterstitialRouter {
72    /// Build a router with the supplied policy.
73    #[must_use]
74    pub const fn new(policy: InterstitialPolicy) -> Self {
75        Self {
76            classifier: InterstitialClassifier::new(),
77            policy,
78        }
79    }
80
81    /// Build a router with the default policy.
82    #[must_use]
83    pub fn with_defaults() -> Self {
84        Self::new(InterstitialPolicy::default())
85    }
86
87    /// Borrow the configured policy.
88    #[must_use]
89    pub const fn policy(&self) -> &InterstitialPolicy {
90        &self.policy
91    }
92
93    /// Replace the policy.
94    #[must_use]
95    pub const fn with_policy(mut self, policy: InterstitialPolicy) -> Self {
96        self.policy = policy;
97        self
98    }
99
100    /// Classify `signature` via the router's classifier
101    /// and route the result. This is the one-shot helper
102    /// most callers want.
103    ///
104    /// # Example
105    ///
106    /// ```
107    /// use stygian_browser::interstitial_router::{
108    ///     InterstitialKind, InterstitialRouter, PageSignature,
109    /// };
110    ///
111    /// let router = InterstitialRouter::with_defaults();
112    /// let sig = PageSignature::new("https://example.com", Some(302));
113    /// let decision = router.classify_and_route(&sig);
114    /// assert_eq!(decision.kind(), InterstitialKind::Transient);
115    /// ```
116    #[must_use]
117    pub fn classify_and_route(&self, signature: &PageSignature) -> RouterDecision {
118        let kind = self.classifier.classify(signature);
119        self.route(signature, kind)
120    }
121
122    /// Route a pre-classified signature. The decision is
123    /// built from the supplied `kind` plus the
124    /// signature's evidence.
125    ///
126    /// # Example
127    ///
128    /// ```
129    /// use stygian_browser::interstitial_router::{
130    ///     InterstitialKind, InterstitialRouter, InterstitialSeverity, PageSignature,
131    /// };
132    ///
133    /// let router = InterstitialRouter::with_defaults();
134    /// let sig = PageSignature::new("https://example.com", Some(202))
135    ///     .with_body_marker("please wait");
136    /// let decision = router.route(&sig, InterstitialKind::Queue);
137    /// assert_eq!(decision.severity(), InterstitialSeverity::Retryable);
138    /// ```
139    #[must_use]
140    pub fn route(&self, signature: &PageSignature, kind: InterstitialKind) -> RouterDecision {
141        let evidence = build_evidence(signature, kind);
142        let route = self.build_route(signature, kind);
143        let reason = build_reason(signature, kind);
144        RouterDecision::new(kind, route, reason, evidence)
145    }
146
147    fn build_route(&self, signature: &PageSignature, kind: InterstitialKind) -> InterstitialRoute {
148        match kind {
149            InterstitialKind::Queue => InterstitialRoute::WaitAndRetry {
150                interval: self.policy.queue_interval,
151                max_retries: self.policy.queue_max_retries,
152                queue_position: signature.queue_position_hint,
153            },
154            InterstitialKind::Challenge => InterstitialRoute::ChallengeSolve {
155                vendor_hint: signature.vendor_hint.clone(),
156                allowed_strategies: allowed_strategies_for_challenge(),
157                solve_budget: self.policy.challenge_solve_budget,
158            },
159            InterstitialKind::HardBlock => InterstitialRoute::HardBlock {
160                escalate_to: self.policy.hard_block_escalation,
161                rotate_session: true,
162                refresh_sticky: true,
163            },
164            InterstitialKind::Transient => InterstitialRoute::Transient {
165                follow_redirect: self.policy.transient_follow_redirect,
166                max_hops: self.policy.max_transient_hops,
167            },
168        }
169    }
170
171    /// `true` when the router's policy
172    /// (`short_circuit_on_classified`) is set and the
173    /// `kind` is a classified (non-`Transient`) decision.
174    /// The runner calls this helper to decide whether to
175    /// short-circuit on the decision.
176    #[must_use]
177    pub const fn should_short_circuit(&self, kind: InterstitialKind) -> bool {
178        self.policy.short_circuit_on_classified && !matches!(kind, InterstitialKind::Transient)
179    }
180}
181
182/// One-shot helper: classify + route via a default
183/// router. Convenience for tests and call sites that
184/// don't need to customise the policy.
185///
186/// # Example
187///
188/// ```
189/// use stygian_browser::interstitial_router::{
190///     classify_and_route, InterstitialKind, PageSignature,
191/// };
192///
193/// let sig = PageSignature::new("https://example.com/cdn-cgi/challenge-platform/h/b", Some(403))
194///     .with_body_marker("cf-chl-bypass");
195/// let decision = classify_and_route(&sig);
196/// assert_eq!(decision.kind(), InterstitialKind::Challenge);
197/// ```
198#[must_use]
199pub fn classify_and_route(signature: &PageSignature) -> RouterDecision {
200    InterstitialRouter::with_defaults().classify_and_route(signature)
201}
202
203/// One-shot helper: route a pre-classified signature via
204/// a default router.
205#[must_use]
206pub fn route(signature: &PageSignature, kind: InterstitialKind) -> RouterDecision {
207    InterstitialRouter::with_defaults().route(signature, kind)
208}
209
210fn allowed_strategies_for_challenge() -> Vec<StrategyUsed> {
211    vec![
212        StrategyUsed::BrowserLightStealth,
213        StrategyUsed::StickyProxyBrowserSession,
214    ]
215}
216
217fn build_evidence(signature: &PageSignature, kind: InterstitialKind) -> PageSignatureEvidence {
218    let host = signature.host();
219    let matched_url_patterns = match kind {
220        InterstitialKind::HardBlock => {
221            url_pattern_matches(signature, super::classifier::HARD_BLOCK_URL_PATTERNS_PUBLIC)
222        }
223        InterstitialKind::Challenge => {
224            url_pattern_matches(signature, super::classifier::CHALLENGE_URL_PATTERNS_PUBLIC)
225        }
226        InterstitialKind::Queue => {
227            url_pattern_matches(signature, super::classifier::QUEUE_URL_PATTERNS_PUBLIC)
228        }
229        InterstitialKind::Transient => Vec::new(),
230    };
231    let matched_body_markers = body_marker_matches(signature, kind);
232    let matched_headers = match kind {
233        InterstitialKind::Challenge => {
234            header_matches(signature, super::classifier::CHALLENGE_HEADERS_PUBLIC)
235        }
236        _ => Vec::new(),
237    };
238    PageSignatureEvidence {
239        host,
240        status_code: signature.status_code,
241        matched_url_patterns,
242        matched_body_markers,
243        matched_headers,
244        queue_position: signature.queue_position_hint,
245        vendor_hint: signature.vendor_hint.clone(),
246    }
247}
248
249fn url_pattern_matches(signature: &PageSignature, patterns: &[&str]) -> Vec<String> {
250    patterns
251        .iter()
252        .filter(|p| signature.url_contains(p))
253        .map(|p| (*p).to_string())
254        .collect()
255}
256
257fn body_marker_matches(signature: &PageSignature, kind: InterstitialKind) -> Vec<String> {
258    let catalog: &[&str] = match kind {
259        InterstitialKind::HardBlock => super::classifier::HARD_BLOCK_BODY_MARKERS_PUBLIC,
260        InterstitialKind::Challenge => super::classifier::CHALLENGE_BODY_MARKERS_PUBLIC,
261        InterstitialKind::Queue => super::classifier::QUEUE_BODY_MARKERS_PUBLIC,
262        InterstitialKind::Transient => &[],
263    };
264    catalog
265        .iter()
266        .filter(|m| signature.body_contains(m))
267        .map(|m| (*m).to_string())
268        .collect()
269}
270
271fn header_matches(signature: &PageSignature, names: &[&str]) -> Vec<String> {
272    names
273        .iter()
274        .filter(|n| signature.has_header(n))
275        .map(|n| (*n).to_string())
276        .collect()
277}
278
279fn build_reason(signature: &PageSignature, kind: InterstitialKind) -> String {
280    let host = signature.host().unwrap_or_else(|| "<unknown>".to_string());
281    match kind {
282        InterstitialKind::Queue => format!(
283            "queue page observed on {host} (url={})",
284            truncate_url(&signature.url)
285        ),
286        InterstitialKind::Challenge => format!(
287            "challenge interstitial observed on {host} (url={})",
288            truncate_url(&signature.url)
289        ),
290        InterstitialKind::HardBlock => format!(
291            "hard block observed on {host} (url={})",
292            truncate_url(&signature.url)
293        ),
294        InterstitialKind::Transient => format!(
295            "transient redirect observed on {host} (url={})",
296            truncate_url(&signature.url)
297        ),
298    }
299}
300
301fn truncate_url(url: &str) -> String {
302    const MAX: usize = 128;
303    if url.len() <= MAX {
304        url.to_string()
305    } else {
306        format!("{}…", &url[..MAX])
307    }
308}
309
310// ─── Tests ────────────────────────────────────────────────────────────────────
311
312#[cfg(test)]
313#[allow(
314    clippy::unwrap_used,
315    clippy::expect_used,
316    clippy::panic,
317    clippy::indexing_slicing
318)]
319mod tests {
320    use super::super::policy::{
321        DEFAULT_HARD_BLOCK_ESCALATION, DEFAULT_MAX_TRANSIENT_HOPS, DEFAULT_QUEUE_MAX_RETRIES,
322        DEFAULT_TRANSIENT_FOLLOW_REDIRECT, InterstitialSeverity,
323    };
324    use super::*;
325
326    #[test]
327    fn route_returns_wait_and_retry_for_queue() {
328        let router = InterstitialRouter::with_defaults();
329        let sig = PageSignature::new("https://example.com/queue", Some(200))
330            .with_body_marker("please wait")
331            .with_queue_position(5);
332        let decision = router.route(&sig, InterstitialKind::Queue);
333        assert_eq!(decision.kind(), InterstitialKind::Queue);
334        assert_eq!(decision.severity(), InterstitialSeverity::Retryable);
335        match decision.route() {
336            InterstitialRoute::WaitAndRetry {
337                max_retries,
338                queue_position,
339                ..
340            } => {
341                assert_eq!(*max_retries, DEFAULT_QUEUE_MAX_RETRIES);
342                assert_eq!(*queue_position, Some(5));
343            }
344            other => panic!("expected WaitAndRetry, got {other:?}"),
345        }
346    }
347
348    #[test]
349    fn route_returns_challenge_solve_for_challenge() {
350        let router = InterstitialRouter::with_defaults();
351        let sig = PageSignature::new(
352            "https://example.com/cdn-cgi/challenge-platform/h/b",
353            Some(403),
354        )
355        .with_body_marker("cf-chl-bypass")
356        .with_vendor_hint("cloudflare");
357        let decision = router.route(&sig, InterstitialKind::Challenge);
358        assert_eq!(decision.kind(), InterstitialKind::Challenge);
359        assert_eq!(decision.severity(), InterstitialSeverity::RequiresSolve);
360        match decision.route() {
361            InterstitialRoute::ChallengeSolve {
362                vendor_hint,
363                allowed_strategies,
364                ..
365            } => {
366                assert_eq!(vendor_hint.as_deref(), Some("cloudflare"));
367                assert!(allowed_strategies.contains(&StrategyUsed::StickyProxyBrowserSession));
368            }
369            other => panic!("expected ChallengeSolve, got {other:?}"),
370        }
371    }
372
373    #[test]
374    fn route_returns_hard_block_strategy_for_hardblock() {
375        let router = InterstitialRouter::with_defaults();
376        let sig = PageSignature::new("https://example.com/blocked", Some(403))
377            .with_body_marker("access denied");
378        let decision = router.route(&sig, InterstitialKind::HardBlock);
379        assert_eq!(decision.kind(), InterstitialKind::HardBlock);
380        assert_eq!(decision.severity(), InterstitialSeverity::Terminal);
381        assert!(decision.is_terminal());
382        match decision.route() {
383            InterstitialRoute::HardBlock {
384                escalate_to,
385                rotate_session,
386                refresh_sticky,
387            } => {
388                assert_eq!(*escalate_to, DEFAULT_HARD_BLOCK_ESCALATION);
389                assert!(*rotate_session);
390                assert!(*refresh_sticky);
391            }
392            other => panic!("expected HardBlock, got {other:?}"),
393        }
394    }
395
396    #[test]
397    fn route_returns_transient_strategy_for_transient() {
398        let router = InterstitialRouter::with_defaults();
399        let sig = PageSignature::new("https://example.com/redirect", Some(302));
400        let decision = router.route(&sig, InterstitialKind::Transient);
401        assert_eq!(decision.kind(), InterstitialKind::Transient);
402        assert_eq!(decision.severity(), InterstitialSeverity::Retryable);
403        match decision.route() {
404            InterstitialRoute::Transient {
405                follow_redirect,
406                max_hops,
407            } => {
408                assert_eq!(*follow_redirect, DEFAULT_TRANSIENT_FOLLOW_REDIRECT);
409                assert_eq!(*max_hops, DEFAULT_MAX_TRANSIENT_HOPS);
410            }
411            other => panic!("expected Transient, got {other:?}"),
412        }
413    }
414
415    #[test]
416    fn should_short_circuit_skips_transient() {
417        let router = InterstitialRouter::with_defaults();
418        assert!(router.should_short_circuit(InterstitialKind::Queue));
419        assert!(router.should_short_circuit(InterstitialKind::Challenge));
420        assert!(router.should_short_circuit(InterstitialKind::HardBlock));
421        assert!(!router.should_short_circuit(InterstitialKind::Transient));
422
423        let lenient = InterstitialRouter::with_defaults().with_policy(InterstitialPolicy {
424            short_circuit_on_classified: false,
425            ..InterstitialPolicy::default()
426        });
427        assert!(!lenient.should_short_circuit(InterstitialKind::HardBlock));
428    }
429
430    #[test]
431    fn determinism_identical_signatures_yield_identical_decisions() {
432        let router = InterstitialRouter::with_defaults();
433        let sig_a = PageSignature::new("https://example.com/blocked", Some(403))
434            .with_body_marker("access denied")
435            .with_vendor_hint("cloudflare");
436        let sig_b = PageSignature::new("https://example.com/blocked", Some(403))
437            .with_body_marker("access denied")
438            .with_vendor_hint("cloudflare");
439        let decision_a = router.classify_and_route(&sig_a);
440        let decision_b = router.classify_and_route(&sig_b);
441        assert_eq!(decision_a, decision_b);
442        // Same kind, severity, route, and reason.
443        assert_eq!(decision_a.kind(), decision_b.kind());
444        assert_eq!(decision_a.severity(), decision_b.severity());
445        assert_eq!(decision_a.route(), decision_b.route());
446        assert_eq!(decision_a.reason(), decision_b.reason());
447    }
448
449    #[test]
450    fn observability_distinguishes_queue_from_hard_block() {
451        let router = InterstitialRouter::with_defaults();
452        let queue_sig = PageSignature::new("https://example.com/queue", Some(200))
453            .with_body_marker("please wait");
454        let hard_block_sig = PageSignature::new("https://example.com/blocked", Some(403))
455            .with_body_marker("access denied");
456        let queue_decision = router.classify_and_route(&queue_sig);
457        let hard_block_decision = router.classify_and_route(&hard_block_sig);
458        // The dedicated severity field must distinguish them.
459        assert_eq!(queue_decision.severity(), InterstitialSeverity::Retryable);
460        assert_eq!(
461            hard_block_decision.severity(),
462            InterstitialSeverity::Terminal
463        );
464        assert!(queue_decision.is_retryable());
465        assert!(hard_block_decision.is_terminal());
466        assert!(!queue_decision.is_terminal());
467        assert!(!hard_block_decision.is_retryable());
468    }
469}