stygian_browser/interstitial_router/
router.rs1use crate::acquisition::StrategyUsed;
34
35use super::classifier::{InterstitialClassifier, PageSignature};
36use super::policy::{InterstitialKind, InterstitialPolicy, InterstitialRoute};
37use super::report::{PageSignatureEvidence, RouterDecision};
38
39#[derive(Debug, Clone)]
60pub struct InterstitialRouter {
61 classifier: InterstitialClassifier,
62 policy: InterstitialPolicy,
63}
64
65impl Default for InterstitialRouter {
66 fn default() -> Self {
67 Self::with_defaults()
68 }
69}
70
71impl InterstitialRouter {
72 #[must_use]
74 pub const fn new(policy: InterstitialPolicy) -> Self {
75 Self {
76 classifier: InterstitialClassifier::new(),
77 policy,
78 }
79 }
80
81 #[must_use]
83 pub fn with_defaults() -> Self {
84 Self::new(InterstitialPolicy::default())
85 }
86
87 #[must_use]
89 pub const fn policy(&self) -> &InterstitialPolicy {
90 &self.policy
91 }
92
93 #[must_use]
95 pub const fn with_policy(mut self, policy: InterstitialPolicy) -> Self {
96 self.policy = policy;
97 self
98 }
99
100 #[must_use]
117 pub fn classify_and_route(&self, signature: &PageSignature) -> RouterDecision {
118 let kind = self.classifier.classify(signature);
119 self.route(signature, kind)
120 }
121
122 #[must_use]
140 pub fn route(&self, signature: &PageSignature, kind: InterstitialKind) -> RouterDecision {
141 let evidence = build_evidence(signature, kind);
142 let route = self.build_route(signature, kind);
143 let reason = build_reason(signature, kind);
144 RouterDecision::new(kind, route, reason, evidence)
145 }
146
147 fn build_route(&self, signature: &PageSignature, kind: InterstitialKind) -> InterstitialRoute {
148 match kind {
149 InterstitialKind::Queue => InterstitialRoute::WaitAndRetry {
150 interval: self.policy.queue_interval,
151 max_retries: self.policy.queue_max_retries,
152 queue_position: signature.queue_position_hint,
153 },
154 InterstitialKind::Challenge => InterstitialRoute::ChallengeSolve {
155 vendor_hint: signature.vendor_hint.clone(),
156 allowed_strategies: allowed_strategies_for_challenge(),
157 solve_budget: self.policy.challenge_solve_budget,
158 },
159 InterstitialKind::HardBlock => InterstitialRoute::HardBlock {
160 escalate_to: self.policy.hard_block_escalation,
161 rotate_session: true,
162 refresh_sticky: true,
163 },
164 InterstitialKind::Transient => InterstitialRoute::Transient {
165 follow_redirect: self.policy.transient_follow_redirect,
166 max_hops: self.policy.max_transient_hops,
167 },
168 }
169 }
170
171 #[must_use]
177 pub const fn should_short_circuit(&self, kind: InterstitialKind) -> bool {
178 self.policy.short_circuit_on_classified && !matches!(kind, InterstitialKind::Transient)
179 }
180}
181
182#[must_use]
199pub fn classify_and_route(signature: &PageSignature) -> RouterDecision {
200 InterstitialRouter::with_defaults().classify_and_route(signature)
201}
202
203#[must_use]
206pub fn route(signature: &PageSignature, kind: InterstitialKind) -> RouterDecision {
207 InterstitialRouter::with_defaults().route(signature, kind)
208}
209
210fn allowed_strategies_for_challenge() -> Vec<StrategyUsed> {
211 vec![
212 StrategyUsed::BrowserLightStealth,
213 StrategyUsed::StickyProxyBrowserSession,
214 ]
215}
216
217fn build_evidence(signature: &PageSignature, kind: InterstitialKind) -> PageSignatureEvidence {
218 let host = signature.host();
219 let matched_url_patterns = match kind {
220 InterstitialKind::HardBlock => {
221 url_pattern_matches(signature, super::classifier::HARD_BLOCK_URL_PATTERNS_PUBLIC)
222 }
223 InterstitialKind::Challenge => {
224 url_pattern_matches(signature, super::classifier::CHALLENGE_URL_PATTERNS_PUBLIC)
225 }
226 InterstitialKind::Queue => {
227 url_pattern_matches(signature, super::classifier::QUEUE_URL_PATTERNS_PUBLIC)
228 }
229 InterstitialKind::Transient => Vec::new(),
230 };
231 let matched_body_markers = body_marker_matches(signature, kind);
232 let matched_headers = match kind {
233 InterstitialKind::Challenge => {
234 header_matches(signature, super::classifier::CHALLENGE_HEADERS_PUBLIC)
235 }
236 _ => Vec::new(),
237 };
238 PageSignatureEvidence {
239 host,
240 status_code: signature.status_code,
241 matched_url_patterns,
242 matched_body_markers,
243 matched_headers,
244 queue_position: signature.queue_position_hint,
245 vendor_hint: signature.vendor_hint.clone(),
246 }
247}
248
249fn url_pattern_matches(signature: &PageSignature, patterns: &[&str]) -> Vec<String> {
250 patterns
251 .iter()
252 .filter(|p| signature.url_contains(p))
253 .map(|p| (*p).to_string())
254 .collect()
255}
256
257fn body_marker_matches(signature: &PageSignature, kind: InterstitialKind) -> Vec<String> {
258 let catalog: &[&str] = match kind {
259 InterstitialKind::HardBlock => super::classifier::HARD_BLOCK_BODY_MARKERS_PUBLIC,
260 InterstitialKind::Challenge => super::classifier::CHALLENGE_BODY_MARKERS_PUBLIC,
261 InterstitialKind::Queue => super::classifier::QUEUE_BODY_MARKERS_PUBLIC,
262 InterstitialKind::Transient => &[],
263 };
264 catalog
265 .iter()
266 .filter(|m| signature.body_contains(m))
267 .map(|m| (*m).to_string())
268 .collect()
269}
270
271fn header_matches(signature: &PageSignature, names: &[&str]) -> Vec<String> {
272 names
273 .iter()
274 .filter(|n| signature.has_header(n))
275 .map(|n| (*n).to_string())
276 .collect()
277}
278
279fn build_reason(signature: &PageSignature, kind: InterstitialKind) -> String {
280 let host = signature.host().unwrap_or_else(|| "<unknown>".to_string());
281 match kind {
282 InterstitialKind::Queue => format!(
283 "queue page observed on {host} (url={})",
284 truncate_url(&signature.url)
285 ),
286 InterstitialKind::Challenge => format!(
287 "challenge interstitial observed on {host} (url={})",
288 truncate_url(&signature.url)
289 ),
290 InterstitialKind::HardBlock => format!(
291 "hard block observed on {host} (url={})",
292 truncate_url(&signature.url)
293 ),
294 InterstitialKind::Transient => format!(
295 "transient redirect observed on {host} (url={})",
296 truncate_url(&signature.url)
297 ),
298 }
299}
300
301fn truncate_url(url: &str) -> String {
302 const MAX: usize = 128;
303 if url.len() <= MAX {
304 url.to_string()
305 } else {
306 format!("{}…", &url[..MAX])
307 }
308}
309
310#[cfg(test)]
313#[allow(
314 clippy::unwrap_used,
315 clippy::expect_used,
316 clippy::panic,
317 clippy::indexing_slicing
318)]
319mod tests {
320 use super::super::policy::{
321 DEFAULT_HARD_BLOCK_ESCALATION, DEFAULT_MAX_TRANSIENT_HOPS, DEFAULT_QUEUE_MAX_RETRIES,
322 DEFAULT_TRANSIENT_FOLLOW_REDIRECT, InterstitialSeverity,
323 };
324 use super::*;
325
326 #[test]
327 fn route_returns_wait_and_retry_for_queue() {
328 let router = InterstitialRouter::with_defaults();
329 let sig = PageSignature::new("https://example.com/queue", Some(200))
330 .with_body_marker("please wait")
331 .with_queue_position(5);
332 let decision = router.route(&sig, InterstitialKind::Queue);
333 assert_eq!(decision.kind(), InterstitialKind::Queue);
334 assert_eq!(decision.severity(), InterstitialSeverity::Retryable);
335 match decision.route() {
336 InterstitialRoute::WaitAndRetry {
337 max_retries,
338 queue_position,
339 ..
340 } => {
341 assert_eq!(*max_retries, DEFAULT_QUEUE_MAX_RETRIES);
342 assert_eq!(*queue_position, Some(5));
343 }
344 other => panic!("expected WaitAndRetry, got {other:?}"),
345 }
346 }
347
348 #[test]
349 fn route_returns_challenge_solve_for_challenge() {
350 let router = InterstitialRouter::with_defaults();
351 let sig = PageSignature::new(
352 "https://example.com/cdn-cgi/challenge-platform/h/b",
353 Some(403),
354 )
355 .with_body_marker("cf-chl-bypass")
356 .with_vendor_hint("cloudflare");
357 let decision = router.route(&sig, InterstitialKind::Challenge);
358 assert_eq!(decision.kind(), InterstitialKind::Challenge);
359 assert_eq!(decision.severity(), InterstitialSeverity::RequiresSolve);
360 match decision.route() {
361 InterstitialRoute::ChallengeSolve {
362 vendor_hint,
363 allowed_strategies,
364 ..
365 } => {
366 assert_eq!(vendor_hint.as_deref(), Some("cloudflare"));
367 assert!(allowed_strategies.contains(&StrategyUsed::StickyProxyBrowserSession));
368 }
369 other => panic!("expected ChallengeSolve, got {other:?}"),
370 }
371 }
372
373 #[test]
374 fn route_returns_hard_block_strategy_for_hardblock() {
375 let router = InterstitialRouter::with_defaults();
376 let sig = PageSignature::new("https://example.com/blocked", Some(403))
377 .with_body_marker("access denied");
378 let decision = router.route(&sig, InterstitialKind::HardBlock);
379 assert_eq!(decision.kind(), InterstitialKind::HardBlock);
380 assert_eq!(decision.severity(), InterstitialSeverity::Terminal);
381 assert!(decision.is_terminal());
382 match decision.route() {
383 InterstitialRoute::HardBlock {
384 escalate_to,
385 rotate_session,
386 refresh_sticky,
387 } => {
388 assert_eq!(*escalate_to, DEFAULT_HARD_BLOCK_ESCALATION);
389 assert!(*rotate_session);
390 assert!(*refresh_sticky);
391 }
392 other => panic!("expected HardBlock, got {other:?}"),
393 }
394 }
395
396 #[test]
397 fn route_returns_transient_strategy_for_transient() {
398 let router = InterstitialRouter::with_defaults();
399 let sig = PageSignature::new("https://example.com/redirect", Some(302));
400 let decision = router.route(&sig, InterstitialKind::Transient);
401 assert_eq!(decision.kind(), InterstitialKind::Transient);
402 assert_eq!(decision.severity(), InterstitialSeverity::Retryable);
403 match decision.route() {
404 InterstitialRoute::Transient {
405 follow_redirect,
406 max_hops,
407 } => {
408 assert_eq!(*follow_redirect, DEFAULT_TRANSIENT_FOLLOW_REDIRECT);
409 assert_eq!(*max_hops, DEFAULT_MAX_TRANSIENT_HOPS);
410 }
411 other => panic!("expected Transient, got {other:?}"),
412 }
413 }
414
415 #[test]
416 fn should_short_circuit_skips_transient() {
417 let router = InterstitialRouter::with_defaults();
418 assert!(router.should_short_circuit(InterstitialKind::Queue));
419 assert!(router.should_short_circuit(InterstitialKind::Challenge));
420 assert!(router.should_short_circuit(InterstitialKind::HardBlock));
421 assert!(!router.should_short_circuit(InterstitialKind::Transient));
422
423 let lenient = InterstitialRouter::with_defaults().with_policy(InterstitialPolicy {
424 short_circuit_on_classified: false,
425 ..InterstitialPolicy::default()
426 });
427 assert!(!lenient.should_short_circuit(InterstitialKind::HardBlock));
428 }
429
430 #[test]
431 fn determinism_identical_signatures_yield_identical_decisions() {
432 let router = InterstitialRouter::with_defaults();
433 let sig_a = PageSignature::new("https://example.com/blocked", Some(403))
434 .with_body_marker("access denied")
435 .with_vendor_hint("cloudflare");
436 let sig_b = PageSignature::new("https://example.com/blocked", Some(403))
437 .with_body_marker("access denied")
438 .with_vendor_hint("cloudflare");
439 let decision_a = router.classify_and_route(&sig_a);
440 let decision_b = router.classify_and_route(&sig_b);
441 assert_eq!(decision_a, decision_b);
442 assert_eq!(decision_a.kind(), decision_b.kind());
444 assert_eq!(decision_a.severity(), decision_b.severity());
445 assert_eq!(decision_a.route(), decision_b.route());
446 assert_eq!(decision_a.reason(), decision_b.reason());
447 }
448
449 #[test]
450 fn observability_distinguishes_queue_from_hard_block() {
451 let router = InterstitialRouter::with_defaults();
452 let queue_sig = PageSignature::new("https://example.com/queue", Some(200))
453 .with_body_marker("please wait");
454 let hard_block_sig = PageSignature::new("https://example.com/blocked", Some(403))
455 .with_body_marker("access denied");
456 let queue_decision = router.classify_and_route(&queue_sig);
457 let hard_block_decision = router.classify_and_route(&hard_block_sig);
458 assert_eq!(queue_decision.severity(), InterstitialSeverity::Retryable);
460 assert_eq!(
461 hard_block_decision.severity(),
462 InterstitialSeverity::Terminal
463 );
464 assert!(queue_decision.is_retryable());
465 assert!(hard_block_decision.is_terminal());
466 assert!(!queue_decision.is_terminal());
467 assert!(!hard_block_decision.is_retryable());
468 }
469}