Skip to main content

stygian_browser/interstitial_router/
policy.rs

1//! Interstitial routing policy and route schema.
2//!
3//! Defines the [`InterstitialPolicy`] (queue / challenge /
4//! hard-block / transient tunables) and the
5//! [`InterstitialRoute`] enum (the dedicated strategy per
6//! [`InterstitialKind`]) plus the [`InterstitialSeverity`]
7//! tier (the observability discriminator that tells
8//! downstream tooling whether the classified state is
9//! retryable, requires solving, or is terminal).
10//!
11//! ## Severity tier vs classification kind
12//!
13//! The severity tier is a **dedicated field** on
14//! [`RouterDecision`] (see
15//! [`RouterDecision::severity`][super::RouterDecision::severity])
16//! that groups [`InterstitialKind`]s by their
17//! **operational** meaning rather than their structural
18//! classification. Observability tooling can therefore
19//! distinguish "queue (retryable wait)" from "hard block
20//! (terminal escalation)" by reading the dedicated severity
21//! field without branching on the kind enum.
22
23use std::fmt;
24use std::time::Duration;
25
26use serde::{Deserialize, Serialize};
27
28use crate::acquisition::StrategyUsed;
29
30/// Default wait interval between queue retries.
31///
32/// 5 seconds is a safe "polite" default for the
33/// "Please wait..." / queue-position interstitials. The
34/// caller can shorten this via
35/// [`InterstitialPolicy::with_queue_interval`].
36pub const DEFAULT_QUEUE_INTERVAL_MS: u64 = 5_000;
37
38/// Default maximum retries for a queue page.
39///
40/// Three retries matches the documented
41/// "wait, retry, escalate" cadence. The caller can override
42/// via [`InterstitialPolicy::with_queue_max_retries`].
43pub const DEFAULT_QUEUE_MAX_RETRIES: u32 = 3;
44
45/// Default challenge solve budget.
46///
47/// 30 seconds is enough to solve a captcha / turnstile
48/// challenge via the [`StrategyUsed::StickyProxyBrowserSession`]
49/// stage. The caller can override via
50/// [`InterstitialPolicy::with_challenge_solve_budget`].
51pub const DEFAULT_CHALLENGE_SOLVE_BUDGET_MS: u64 = 30_000;
52
53/// Default strategy to escalate to on a hard block.
54///
55/// Browser + sticky is the most expensive strategy, so the
56/// default escalation is conservative — the caller is
57/// expected to use the `Escalate` route as a last-resort
58/// signal rather than a routine retry path.
59pub const DEFAULT_HARD_BLOCK_ESCALATION: StrategyUsed = StrategyUsed::StickyProxyBrowserSession;
60
61/// Default follow-redirect flag for transient pages.
62pub const DEFAULT_TRANSIENT_FOLLOW_REDIRECT: bool = true;
63
64/// Default max redirect hops to follow on a transient page.
65pub const DEFAULT_MAX_TRANSIENT_HOPS: u32 = 3;
66
67/// Classification kind for an interstitial page.
68///
69/// One of four shapes the classifier emits. The
70/// [`InterstitialRouter`][super::InterstitialRouter] maps
71/// each kind to a dedicated [`InterstitialRoute`].
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
73#[serde(rename_all = "snake_case")]
74pub enum InterstitialKind {
75    /// "Please wait..." / waiting-room page. Retryable.
76    Queue,
77    /// Vendor-issued challenge (captcha, turnstile, `PoW`).
78    Challenge,
79    /// Terminal vendor block page.
80    HardBlock,
81    /// Bounded 3xx redirect chain that should be followed
82    /// before classifying the response.
83    Transient,
84}
85
86impl InterstitialKind {
87    /// Stable `snake_case` label used in telemetry output.
88    #[must_use]
89    pub const fn label(self) -> &'static str {
90        match self {
91            Self::Queue => "queue",
92            Self::Challenge => "challenge",
93            Self::HardBlock => "hard_block",
94            Self::Transient => "transient",
95        }
96    }
97
98    /// `true` when the kind is [`Self::Queue`].
99    #[must_use]
100    pub const fn is_queue(self) -> bool {
101        matches!(self, Self::Queue)
102    }
103
104    /// `true` when the kind is [`Self::HardBlock`].
105    #[must_use]
106    pub const fn is_hard_block(self) -> bool {
107        matches!(self, Self::HardBlock)
108    }
109
110    /// `true` when the kind is [`Self::Challenge`].
111    #[must_use]
112    pub const fn is_challenge(self) -> bool {
113        matches!(self, Self::Challenge)
114    }
115
116    /// `true` when the kind is [`Self::Transient`].
117    #[must_use]
118    pub const fn is_transient(self) -> bool {
119        matches!(self, Self::Transient)
120    }
121}
122
123impl fmt::Display for InterstitialKind {
124    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125        f.write_str(self.label())
126    }
127}
128
129/// Operational severity tier for an interstitial decision.
130///
131/// The tier is a **dedicated** observability field
132/// ([`RouterDecision::severity`][super::RouterDecision::severity])
133/// that groups [`InterstitialKind`]s by their operational
134/// meaning. It is intentionally a separate enum from
135/// [`InterstitialKind`] so downstream tooling can branch on
136/// "retryable vs terminal" without re-deriving it from the
137/// kind.
138///
139/// | Kind | Severity |
140/// |---|---|
141/// | [`Queue`][InterstitialKind::Queue] | [`Retryable`][Self::Retryable] |
142/// | [`Transient`][InterstitialKind::Transient] | [`Retryable`][Self::Retryable] |
143/// | [`Challenge`][InterstitialKind::Challenge] | [`RequiresSolve`][Self::RequiresSolve] |
144/// | [`HardBlock`][InterstitialKind::HardBlock] | [`Terminal`][Self::Terminal] |
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
146#[serde(rename_all = "snake_case")]
147pub enum InterstitialSeverity {
148    /// The classified page is a wait / retry path. The
149    /// runner may short-circuit and let the calling layer
150    /// wait + retry.
151    Retryable,
152    /// The classified page is a vendor challenge that
153    /// requires solving before the target document can be
154    /// returned.
155    RequiresSolve,
156    /// The classified page is a terminal vendor block. The
157    /// runner should escalate (rotate session, refresh
158    /// sticky context, switch to the strongest strategy).
159    Terminal,
160}
161
162impl InterstitialSeverity {
163    /// Stable `snake_case` label.
164    #[must_use]
165    pub const fn label(self) -> &'static str {
166        match self {
167            Self::Retryable => "retryable",
168            Self::RequiresSolve => "requires_solve",
169            Self::Terminal => "terminal",
170        }
171    }
172
173    /// Map a classification kind to its severity tier.
174    #[must_use]
175    pub const fn for_kind(kind: InterstitialKind) -> Self {
176        match kind {
177            InterstitialKind::Queue | InterstitialKind::Transient => Self::Retryable,
178            InterstitialKind::Challenge => Self::RequiresSolve,
179            InterstitialKind::HardBlock => Self::Terminal,
180        }
181    }
182}
183
184impl fmt::Display for InterstitialSeverity {
185    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
186        f.write_str(self.label())
187    }
188}
189
190/// Dedicated acquisition route per [`InterstitialKind`].
191///
192/// Each variant carries the per-kind tunables. The route is
193/// purely declarative — the actual acquisition ladder
194/// adjustment is done by the calling layer (or by the
195/// runner, when
196/// [`InterstitialPolicy::short_circuit_on_classified`] is
197/// `true`).
198#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
199#[serde(rename_all = "snake_case", tag = "route")]
200pub enum InterstitialRoute {
201    /// Wait `interval` for up to `max_retries` attempts.
202    /// Carries the optional `queue_position` (e.g. "you are
203    /// #5 in line") so the caller can scale the wait by the
204    /// position.
205    WaitAndRetry {
206        /// Wait interval between retries.
207        #[serde(with = "duration_ms")]
208        interval: Duration,
209        /// Maximum retry attempts.
210        max_retries: u32,
211        /// Optional queue position hint extracted from the
212        /// page (1-based, where 1 = first in line).
213        queue_position: Option<u32>,
214    },
215    /// Escalate to a challenge-solving strategy with the
216    /// given `solve_budget`. The optional `vendor_hint`
217    /// narrows the strategy (e.g. `cloudflare`,
218    /// `perimeterx`).
219    ChallengeSolve {
220        /// Optional vendor hint extracted from the page
221        /// markers (e.g. `cloudflare`, `akamai`).
222        vendor_hint: Option<String>,
223        /// Strategies the caller may attempt.
224        allowed_strategies: Vec<StrategyUsed>,
225        /// Maximum wall-clock budget for the solve.
226        #[serde(with = "duration_ms")]
227        solve_budget: Duration,
228    },
229    /// Terminal vendor block. Escalate to
230    /// `escalate_to`, optionally rotate the proxy session,
231    /// and optionally invalidate the sticky pool context.
232    HardBlock {
233        /// Strategy to escalate to.
234        escalate_to: StrategyUsed,
235        /// Whether to rotate the proxy session.
236        rotate_session: bool,
237        /// Whether to invalidate the sticky pool context.
238        refresh_sticky: bool,
239    },
240    /// Follow up to `max_hops` redirect hops and then
241    /// re-classify the result.
242    Transient {
243        /// Whether to follow the redirect.
244        follow_redirect: bool,
245        /// Maximum redirect hops to follow.
246        max_hops: u32,
247    },
248}
249
250impl InterstitialRoute {
251    /// Stable `snake_case` route name.
252    #[must_use]
253    pub const fn label(&self) -> &'static str {
254        match self {
255            Self::WaitAndRetry { .. } => "wait_and_retry",
256            Self::ChallengeSolve { .. } => "challenge_solve",
257            Self::HardBlock { .. } => "hard_block",
258            Self::Transient { .. } => "transient",
259        }
260    }
261}
262
263/// Routing tunables for [`InterstitialRouter`][super::InterstitialRouter].
264///
265/// All fields have safe defaults that the production
266/// acquisition path uses unchanged. Callers can override
267/// any field via the `with_*` builders.
268///
269/// # Example
270///
271/// ```
272/// use stygian_browser::interstitial_router::InterstitialPolicy;
273/// use std::time::Duration;
274///
275/// let policy = InterstitialPolicy {
276///     queue_interval: Duration::from_secs(10),
277///     ..InterstitialPolicy::default()
278/// };
279/// assert_eq!(policy.queue_interval, Duration::from_secs(10));
280/// assert!(policy.short_circuit_on_classified);
281/// ```
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct InterstitialPolicy {
284    /// Wait interval between queue retries.
285    #[serde(with = "duration_ms")]
286    pub queue_interval: Duration,
287    /// Maximum retries for a queue page.
288    pub queue_max_retries: u32,
289    /// Challenge solve budget.
290    #[serde(with = "duration_ms")]
291    pub challenge_solve_budget: Duration,
292    /// Strategy to escalate to on a hard block.
293    pub hard_block_escalation: StrategyUsed,
294    /// Whether to follow redirects on a transient page.
295    pub transient_follow_redirect: bool,
296    /// Maximum redirect hops to follow on a transient page.
297    pub max_transient_hops: u32,
298    /// When `true` (default), a non-`Transient`
299    /// classification short-circuits the runner with a
300    /// structured
301    /// [`StageFailureKind::InterstitialRouted`][crate::acquisition::StageFailureKind::InterstitialRouted]
302    /// failure so the calling layer can route via the
303    /// dedicated strategy. When `false`, the decision is
304    /// only attached to the result — the runner still
305    /// executes the strategy ladder.
306    pub short_circuit_on_classified: bool,
307}
308
309impl Default for InterstitialPolicy {
310    fn default() -> Self {
311        Self {
312            queue_interval: Duration::from_millis(DEFAULT_QUEUE_INTERVAL_MS),
313            queue_max_retries: DEFAULT_QUEUE_MAX_RETRIES,
314            challenge_solve_budget: Duration::from_millis(DEFAULT_CHALLENGE_SOLVE_BUDGET_MS),
315            hard_block_escalation: DEFAULT_HARD_BLOCK_ESCALATION,
316            transient_follow_redirect: DEFAULT_TRANSIENT_FOLLOW_REDIRECT,
317            max_transient_hops: DEFAULT_MAX_TRANSIENT_HOPS,
318            short_circuit_on_classified: true,
319        }
320    }
321}
322
323impl InterstitialPolicy {
324    /// Build a policy with an explicit queue interval.
325    #[must_use]
326    pub const fn with_queue_interval(mut self, interval: Duration) -> Self {
327        self.queue_interval = interval;
328        self
329    }
330
331    /// Build a policy with an explicit max retries value.
332    #[must_use]
333    pub const fn with_queue_max_retries(mut self, max_retries: u32) -> Self {
334        self.queue_max_retries = max_retries;
335        self
336    }
337
338    /// Build a policy with an explicit challenge solve
339    /// budget.
340    #[must_use]
341    pub const fn with_challenge_solve_budget(mut self, budget: Duration) -> Self {
342        self.challenge_solve_budget = budget;
343        self
344    }
345
346    /// Build a policy with an explicit hard-block
347    /// escalation strategy.
348    #[must_use]
349    pub const fn with_hard_block_escalation(mut self, strategy: StrategyUsed) -> Self {
350        self.hard_block_escalation = strategy;
351        self
352    }
353
354    /// Build a policy with an explicit follow-redirect flag.
355    #[must_use]
356    pub const fn with_transient_follow_redirect(mut self, follow: bool) -> Self {
357        self.transient_follow_redirect = follow;
358        self
359    }
360
361    /// Build a policy with an explicit max-hops value.
362    #[must_use]
363    pub const fn with_max_transient_hops(mut self, max_hops: u32) -> Self {
364        self.max_transient_hops = max_hops;
365        self
366    }
367
368    /// Build a policy with an explicit short-circuit flag.
369    #[must_use]
370    pub const fn with_short_circuit(mut self, short_circuit: bool) -> Self {
371        self.short_circuit_on_classified = short_circuit;
372        self
373    }
374}
375
376/// serde helper: serialise [`Duration`] as integer
377/// milliseconds.
378mod duration_ms {
379    use std::time::Duration;
380
381    use serde::{Deserialize, Deserializer, Serializer};
382
383    #[allow(clippy::cast_possible_truncation)]
384    pub fn serialize<S: Serializer>(value: &Duration, ser: S) -> Result<S::Ok, S::Error> {
385        let ms = value.as_millis();
386        let n = if ms > u128::from(u64::MAX) {
387            u64::MAX
388        } else {
389            ms as u64
390        };
391        ser.serialize_u64(n)
392    }
393
394    pub fn deserialize<'de, D: Deserializer<'de>>(de: D) -> Result<Duration, D::Error> {
395        let ms = u64::deserialize(de)?;
396        Ok(Duration::from_millis(ms))
397    }
398}