stygian_browser/interstitial_router/policy.rs
1//! Interstitial routing policy and route schema.
2//!
3//! Defines the [`InterstitialPolicy`] (queue / challenge /
4//! hard-block / transient tunables) and the
5//! [`InterstitialRoute`] enum (the dedicated strategy per
6//! [`InterstitialKind`]) plus the [`InterstitialSeverity`]
7//! tier (the observability discriminator that tells
8//! downstream tooling whether the classified state is
9//! retryable, requires solving, or is terminal).
10//!
11//! ## Severity tier vs classification kind
12//!
13//! The severity tier is a **dedicated field** on
14//! [`RouterDecision`] (see
15//! [`RouterDecision::severity`][super::RouterDecision::severity])
16//! that groups [`InterstitialKind`]s by their
17//! **operational** meaning rather than their structural
18//! classification. Observability tooling can therefore
19//! distinguish "queue (retryable wait)" from "hard block
20//! (terminal escalation)" by reading the dedicated severity
21//! field without branching on the kind enum.
22
23use std::fmt;
24use std::time::Duration;
25
26use serde::{Deserialize, Serialize};
27
28use crate::acquisition::StrategyUsed;
29
30/// Default wait interval between queue retries.
31///
32/// 5 seconds is a safe "polite" default for the
33/// "Please wait..." / queue-position interstitials. The
34/// caller can shorten this via
35/// [`InterstitialPolicy::with_queue_interval`].
36pub const DEFAULT_QUEUE_INTERVAL_MS: u64 = 5_000;
37
38/// Default maximum retries for a queue page.
39///
40/// Three retries matches the documented
41/// "wait, retry, escalate" cadence. The caller can override
42/// via [`InterstitialPolicy::with_queue_max_retries`].
43pub const DEFAULT_QUEUE_MAX_RETRIES: u32 = 3;
44
45/// Default challenge solve budget.
46///
47/// 30 seconds is enough to solve a captcha / turnstile
48/// challenge via the [`StrategyUsed::StickyProxyBrowserSession`]
49/// stage. The caller can override via
50/// [`InterstitialPolicy::with_challenge_solve_budget`].
51pub const DEFAULT_CHALLENGE_SOLVE_BUDGET_MS: u64 = 30_000;
52
53/// Default strategy to escalate to on a hard block.
54///
55/// Browser + sticky is the most expensive strategy, so the
56/// default escalation is conservative — the caller is
57/// expected to use the `Escalate` route as a last-resort
58/// signal rather than a routine retry path.
59pub const DEFAULT_HARD_BLOCK_ESCALATION: StrategyUsed = StrategyUsed::StickyProxyBrowserSession;
60
61/// Default follow-redirect flag for transient pages.
62pub const DEFAULT_TRANSIENT_FOLLOW_REDIRECT: bool = true;
63
64/// Default max redirect hops to follow on a transient page.
65pub const DEFAULT_MAX_TRANSIENT_HOPS: u32 = 3;
66
67/// Classification kind for an interstitial page.
68///
69/// One of four shapes the classifier emits. The
70/// [`InterstitialRouter`][super::InterstitialRouter] maps
71/// each kind to a dedicated [`InterstitialRoute`].
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
73#[serde(rename_all = "snake_case")]
74pub enum InterstitialKind {
75 /// "Please wait..." / waiting-room page. Retryable.
76 Queue,
77 /// Vendor-issued challenge (captcha, turnstile, `PoW`).
78 Challenge,
79 /// Terminal vendor block page.
80 HardBlock,
81 /// Bounded 3xx redirect chain that should be followed
82 /// before classifying the response.
83 Transient,
84}
85
86impl InterstitialKind {
87 /// Stable `snake_case` label used in telemetry output.
88 #[must_use]
89 pub const fn label(self) -> &'static str {
90 match self {
91 Self::Queue => "queue",
92 Self::Challenge => "challenge",
93 Self::HardBlock => "hard_block",
94 Self::Transient => "transient",
95 }
96 }
97
98 /// `true` when the kind is [`Self::Queue`].
99 #[must_use]
100 pub const fn is_queue(self) -> bool {
101 matches!(self, Self::Queue)
102 }
103
104 /// `true` when the kind is [`Self::HardBlock`].
105 #[must_use]
106 pub const fn is_hard_block(self) -> bool {
107 matches!(self, Self::HardBlock)
108 }
109
110 /// `true` when the kind is [`Self::Challenge`].
111 #[must_use]
112 pub const fn is_challenge(self) -> bool {
113 matches!(self, Self::Challenge)
114 }
115
116 /// `true` when the kind is [`Self::Transient`].
117 #[must_use]
118 pub const fn is_transient(self) -> bool {
119 matches!(self, Self::Transient)
120 }
121}
122
123impl fmt::Display for InterstitialKind {
124 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125 f.write_str(self.label())
126 }
127}
128
129/// Operational severity tier for an interstitial decision.
130///
131/// The tier is a **dedicated** observability field
132/// ([`RouterDecision::severity`][super::RouterDecision::severity])
133/// that groups [`InterstitialKind`]s by their operational
134/// meaning. It is intentionally a separate enum from
135/// [`InterstitialKind`] so downstream tooling can branch on
136/// "retryable vs terminal" without re-deriving it from the
137/// kind.
138///
139/// | Kind | Severity |
140/// |---|---|
141/// | [`Queue`][InterstitialKind::Queue] | [`Retryable`][Self::Retryable] |
142/// | [`Transient`][InterstitialKind::Transient] | [`Retryable`][Self::Retryable] |
143/// | [`Challenge`][InterstitialKind::Challenge] | [`RequiresSolve`][Self::RequiresSolve] |
144/// | [`HardBlock`][InterstitialKind::HardBlock] | [`Terminal`][Self::Terminal] |
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
146#[serde(rename_all = "snake_case")]
147pub enum InterstitialSeverity {
148 /// The classified page is a wait / retry path. The
149 /// runner may short-circuit and let the calling layer
150 /// wait + retry.
151 Retryable,
152 /// The classified page is a vendor challenge that
153 /// requires solving before the target document can be
154 /// returned.
155 RequiresSolve,
156 /// The classified page is a terminal vendor block. The
157 /// runner should escalate (rotate session, refresh
158 /// sticky context, switch to the strongest strategy).
159 Terminal,
160}
161
162impl InterstitialSeverity {
163 /// Stable `snake_case` label.
164 #[must_use]
165 pub const fn label(self) -> &'static str {
166 match self {
167 Self::Retryable => "retryable",
168 Self::RequiresSolve => "requires_solve",
169 Self::Terminal => "terminal",
170 }
171 }
172
173 /// Map a classification kind to its severity tier.
174 #[must_use]
175 pub const fn for_kind(kind: InterstitialKind) -> Self {
176 match kind {
177 InterstitialKind::Queue | InterstitialKind::Transient => Self::Retryable,
178 InterstitialKind::Challenge => Self::RequiresSolve,
179 InterstitialKind::HardBlock => Self::Terminal,
180 }
181 }
182}
183
184impl fmt::Display for InterstitialSeverity {
185 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
186 f.write_str(self.label())
187 }
188}
189
190/// Dedicated acquisition route per [`InterstitialKind`].
191///
192/// Each variant carries the per-kind tunables. The route is
193/// purely declarative — the actual acquisition ladder
194/// adjustment is done by the calling layer (or by the
195/// runner, when
196/// [`InterstitialPolicy::short_circuit_on_classified`] is
197/// `true`).
198#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
199#[serde(rename_all = "snake_case", tag = "route")]
200pub enum InterstitialRoute {
201 /// Wait `interval` for up to `max_retries` attempts.
202 /// Carries the optional `queue_position` (e.g. "you are
203 /// #5 in line") so the caller can scale the wait by the
204 /// position.
205 WaitAndRetry {
206 /// Wait interval between retries.
207 #[serde(with = "duration_ms")]
208 interval: Duration,
209 /// Maximum retry attempts.
210 max_retries: u32,
211 /// Optional queue position hint extracted from the
212 /// page (1-based, where 1 = first in line).
213 queue_position: Option<u32>,
214 },
215 /// Escalate to a challenge-solving strategy with the
216 /// given `solve_budget`. The optional `vendor_hint`
217 /// narrows the strategy (e.g. `cloudflare`,
218 /// `perimeterx`).
219 ChallengeSolve {
220 /// Optional vendor hint extracted from the page
221 /// markers (e.g. `cloudflare`, `akamai`).
222 vendor_hint: Option<String>,
223 /// Strategies the caller may attempt.
224 allowed_strategies: Vec<StrategyUsed>,
225 /// Maximum wall-clock budget for the solve.
226 #[serde(with = "duration_ms")]
227 solve_budget: Duration,
228 },
229 /// Terminal vendor block. Escalate to
230 /// `escalate_to`, optionally rotate the proxy session,
231 /// and optionally invalidate the sticky pool context.
232 HardBlock {
233 /// Strategy to escalate to.
234 escalate_to: StrategyUsed,
235 /// Whether to rotate the proxy session.
236 rotate_session: bool,
237 /// Whether to invalidate the sticky pool context.
238 refresh_sticky: bool,
239 },
240 /// Follow up to `max_hops` redirect hops and then
241 /// re-classify the result.
242 Transient {
243 /// Whether to follow the redirect.
244 follow_redirect: bool,
245 /// Maximum redirect hops to follow.
246 max_hops: u32,
247 },
248}
249
250impl InterstitialRoute {
251 /// Stable `snake_case` route name.
252 #[must_use]
253 pub const fn label(&self) -> &'static str {
254 match self {
255 Self::WaitAndRetry { .. } => "wait_and_retry",
256 Self::ChallengeSolve { .. } => "challenge_solve",
257 Self::HardBlock { .. } => "hard_block",
258 Self::Transient { .. } => "transient",
259 }
260 }
261}
262
263/// Routing tunables for [`InterstitialRouter`][super::InterstitialRouter].
264///
265/// All fields have safe defaults that the production
266/// acquisition path uses unchanged. Callers can override
267/// any field via the `with_*` builders.
268///
269/// # Example
270///
271/// ```
272/// use stygian_browser::interstitial_router::InterstitialPolicy;
273/// use std::time::Duration;
274///
275/// let policy = InterstitialPolicy {
276/// queue_interval: Duration::from_secs(10),
277/// ..InterstitialPolicy::default()
278/// };
279/// assert_eq!(policy.queue_interval, Duration::from_secs(10));
280/// assert!(policy.short_circuit_on_classified);
281/// ```
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct InterstitialPolicy {
284 /// Wait interval between queue retries.
285 #[serde(with = "duration_ms")]
286 pub queue_interval: Duration,
287 /// Maximum retries for a queue page.
288 pub queue_max_retries: u32,
289 /// Challenge solve budget.
290 #[serde(with = "duration_ms")]
291 pub challenge_solve_budget: Duration,
292 /// Strategy to escalate to on a hard block.
293 pub hard_block_escalation: StrategyUsed,
294 /// Whether to follow redirects on a transient page.
295 pub transient_follow_redirect: bool,
296 /// Maximum redirect hops to follow on a transient page.
297 pub max_transient_hops: u32,
298 /// When `true` (default), a non-`Transient`
299 /// classification short-circuits the runner with a
300 /// structured
301 /// [`StageFailureKind::InterstitialRouted`][crate::acquisition::StageFailureKind::InterstitialRouted]
302 /// failure so the calling layer can route via the
303 /// dedicated strategy. When `false`, the decision is
304 /// only attached to the result — the runner still
305 /// executes the strategy ladder.
306 pub short_circuit_on_classified: bool,
307}
308
309impl Default for InterstitialPolicy {
310 fn default() -> Self {
311 Self {
312 queue_interval: Duration::from_millis(DEFAULT_QUEUE_INTERVAL_MS),
313 queue_max_retries: DEFAULT_QUEUE_MAX_RETRIES,
314 challenge_solve_budget: Duration::from_millis(DEFAULT_CHALLENGE_SOLVE_BUDGET_MS),
315 hard_block_escalation: DEFAULT_HARD_BLOCK_ESCALATION,
316 transient_follow_redirect: DEFAULT_TRANSIENT_FOLLOW_REDIRECT,
317 max_transient_hops: DEFAULT_MAX_TRANSIENT_HOPS,
318 short_circuit_on_classified: true,
319 }
320 }
321}
322
323impl InterstitialPolicy {
324 /// Build a policy with an explicit queue interval.
325 #[must_use]
326 pub const fn with_queue_interval(mut self, interval: Duration) -> Self {
327 self.queue_interval = interval;
328 self
329 }
330
331 /// Build a policy with an explicit max retries value.
332 #[must_use]
333 pub const fn with_queue_max_retries(mut self, max_retries: u32) -> Self {
334 self.queue_max_retries = max_retries;
335 self
336 }
337
338 /// Build a policy with an explicit challenge solve
339 /// budget.
340 #[must_use]
341 pub const fn with_challenge_solve_budget(mut self, budget: Duration) -> Self {
342 self.challenge_solve_budget = budget;
343 self
344 }
345
346 /// Build a policy with an explicit hard-block
347 /// escalation strategy.
348 #[must_use]
349 pub const fn with_hard_block_escalation(mut self, strategy: StrategyUsed) -> Self {
350 self.hard_block_escalation = strategy;
351 self
352 }
353
354 /// Build a policy with an explicit follow-redirect flag.
355 #[must_use]
356 pub const fn with_transient_follow_redirect(mut self, follow: bool) -> Self {
357 self.transient_follow_redirect = follow;
358 self
359 }
360
361 /// Build a policy with an explicit max-hops value.
362 #[must_use]
363 pub const fn with_max_transient_hops(mut self, max_hops: u32) -> Self {
364 self.max_transient_hops = max_hops;
365 self
366 }
367
368 /// Build a policy with an explicit short-circuit flag.
369 #[must_use]
370 pub const fn with_short_circuit(mut self, short_circuit: bool) -> Self {
371 self.short_circuit_on_classified = short_circuit;
372 self
373 }
374}
375
376/// serde helper: serialise [`Duration`] as integer
377/// milliseconds.
378mod duration_ms {
379 use std::time::Duration;
380
381 use serde::{Deserialize, Deserializer, Serializer};
382
383 #[allow(clippy::cast_possible_truncation)]
384 pub fn serialize<S: Serializer>(value: &Duration, ser: S) -> Result<S::Ok, S::Error> {
385 let ms = value.as_millis();
386 let n = if ms > u128::from(u64::MAX) {
387 u64::MAX
388 } else {
389 ms as u64
390 };
391 ser.serialize_u64(n)
392 }
393
394 pub fn deserialize<'de, D: Deserializer<'de>>(de: D) -> Result<Duration, D::Error> {
395 let ms = u64::deserialize(de)?;
396 Ok(Duration::from_millis(ms))
397 }
398}