Skip to main content

stygian_browser/validation/
validators.rs

1//! Individual anti-bot validator implementations.
2
3use std::collections::HashMap;
4use std::sync::Arc;
5use std::time::Duration;
6use std::time::Instant;
7
8use serde_json::{Value, json};
9use tokio::time::sleep;
10use tracing::debug;
11
12use crate::page::WaitUntil;
13use crate::pool::BrowserPool;
14
15use super::{ValidationResult, ValidationTarget};
16
17// ───────────────────────────────────────────────────────────────────────────
18// Tier 1: Open-Source Observatories (no rate limits)
19// ───────────────────────────────────────────────────────────────────────────
20
21/// Run the `CreepJS` observatory validator.
22///
23/// Navigates to `CreepJS`, waits for results, extracts the trust score, and
24/// checks if it is > 50%.
25pub async fn run_creepjs(pool: &Arc<BrowserPool>) -> ValidationResult {
26    let start = Instant::now();
27    let result = creepjs_impl(pool).await;
28    ValidationResult {
29        elapsed: start.elapsed(),
30        ..result
31    }
32}
33
34async fn creepjs_impl(pool: &Arc<BrowserPool>) -> ValidationResult {
35    run_tier1_observatory(pool, ValidationTarget::CreepJs, 0.50).await
36}
37
38/// Run the `BrowserScan` validator.
39///
40/// Navigates to `BrowserScan`, waits for scan completion, and extracts the
41/// authenticity percentage.
42pub async fn run_browserscan(pool: &Arc<BrowserPool>) -> ValidationResult {
43    let start = Instant::now();
44    let result = browserscan_impl(pool).await;
45    ValidationResult {
46        elapsed: start.elapsed(),
47        ..result
48    }
49}
50
51async fn browserscan_impl(pool: &Arc<BrowserPool>) -> ValidationResult {
52    run_tier1_observatory(pool, ValidationTarget::BrowserScan, 0.90).await
53}
54
55async fn run_tier1_observatory(
56    pool: &Arc<BrowserPool>,
57    target: ValidationTarget,
58    min_score: f64,
59) -> ValidationResult {
60    let mut details = HashMap::new();
61    let url = target.url();
62    details.insert("phase".to_string(), "tier1-observatory".to_string());
63    details.insert("url".to_string(), url.to_string());
64
65    let session = match pool.acquire().await {
66        Ok(session) => session,
67        Err(err) => return ValidationResult::failed(target, &err.to_string()),
68    };
69
70    let mut screenshot: Option<Vec<u8>> = None;
71    let mut passed = false;
72    let mut score: Option<f64> = None;
73
74    let result = match session.browser() {
75        Some(browser) => match browser.new_page().await {
76            Ok(mut page) => {
77                let navigate_result = page
78                    .navigate(url, WaitUntil::DomContentLoaded, Duration::from_secs(25))
79                    .await;
80
81                match navigate_result {
82                    Ok(()) => {
83                        // Give observatories time to execute browser fingerprint checks.
84                        sleep(Duration::from_secs(6)).await;
85
86                        let probe = page
87                            .eval::<Value>(
88                                r#"(() => {
89                                    const body = (document.body?.innerText || "").toLowerCase();
90                                    const title = (document.title || "");
91                                    const href = (location.href || "");
92
93                                    const blocked =
94                                        body.includes("access denied") ||
95                                        body.includes("verify you are human") ||
96                                        body.includes("just a moment") ||
97                                        body.includes("captcha") ||
98                                        href.toLowerCase().includes("/js_challenge");
99
100                                    const scorePatterns = [
101                                        /trust\s*score[^0-9]{0,20}([0-9]{1,3}(?:\.[0-9]+)?)/i,
102                                        /authenticity[^0-9]{0,20}([0-9]{1,3}(?:\.[0-9]+)?)/i,
103                                        /score[^0-9]{0,20}([0-9]{1,3}(?:\.[0-9]+)?)/i,
104                                        /([0-9]{1,3}(?:\.[0-9]+)?)\s*%/
105                                    ];
106
107                                    let score = null;
108                                    for (const pattern of scorePatterns) {
109                                        const match = body.match(pattern);
110                                        if (match?.[1]) {
111                                            score = Number(match[1]);
112                                            if (Number.isFinite(score)) break;
113                                        }
114                                    }
115
116                                    return {
117                                        blocked,
118                                        title,
119                                        href,
120                                        score
121                                    };
122                                })()"#,
123                            )
124                            .await
125                            .unwrap_or_else(|_| json!({"blocked": false, "score": Value::Null}));
126
127                        let blocked = probe
128                            .get("blocked")
129                            .and_then(Value::as_bool)
130                            .unwrap_or(false);
131                        score = probe
132                            .get("score")
133                            .and_then(Value::as_f64)
134                            .map(|raw| if raw > 1.0 { raw / 100.0 } else { raw });
135
136                        if let Some(title) = probe.get("title").and_then(Value::as_str) {
137                            details.insert("title".to_string(), title.to_string());
138                        }
139                        if let Some(observed_url) = probe.get("href").and_then(Value::as_str) {
140                            details.insert("observed_url".to_string(), observed_url.to_string());
141                        }
142                        details.insert("blocked".to_string(), blocked.to_string());
143
144                        passed = !blocked && score.is_some_and(|v| v >= min_score);
145                        if !passed {
146                            screenshot = page.screenshot().await.ok();
147                        }
148                    }
149                    Err(err) => {
150                        details.insert("error".to_string(), err.to_string());
151                    }
152                }
153
154                page.close().await.ok();
155                ValidationResult {
156                    target,
157                    passed,
158                    score,
159                    details,
160                    screenshot,
161                    elapsed: Duration::ZERO,
162                }
163            }
164            Err(err) => ValidationResult::failed(target, &err.to_string()),
165        },
166        None => ValidationResult::failed(target, "browser handle lost"),
167    };
168
169    session.release().await;
170    result
171}
172
173// ───────────────────────────────────────────────────────────────────────────
174// Tier 2: Anti-Bot Protected Sites (may rate-limit, use #[ignore])
175// ───────────────────────────────────────────────────────────────────────────
176
177/// Run the Kasada validator against `WizzAir` booking page.
178///
179/// Navigates to a Kasada-protected page, waits for page load, and checks
180/// whether a 429/403 block page is returned or the page loads normally.
181pub async fn run_kasada(pool: &Arc<BrowserPool>) -> ValidationResult {
182    let start = Instant::now();
183    let result = kasada_impl(pool).await;
184    ValidationResult {
185        elapsed: start.elapsed(),
186        ..result
187    }
188}
189
190async fn kasada_impl(pool: &Arc<BrowserPool>) -> ValidationResult {
191    let url = ValidationTarget::Kasada.url();
192    debug!("Kasada validator: navigating to {url}");
193
194    match pool.acquire().await {
195        Ok(session) => {
196            match session.browser() {
197                Some(browser) => {
198                    match browser.new_page().await {
199                        Ok(mut page) => {
200                            // Try to navigate with a generous timeout
201                            let navigate_result = page
202                                .navigate(
203                                    url,
204                                    WaitUntil::DomContentLoaded,
205                                    std::time::Duration::from_secs(20),
206                                )
207                                .await;
208
209                            let passed = match navigate_result {
210                                Ok(()) => {
211                                    // Check HTTP status code — 200 OK is a pass
212                                    true
213                                }
214                                Err(e) => {
215                                    // Navigation timeout or network error typically means blocked
216                                    debug!("Kasada: navigation failed: {}", e);
217                                    false
218                                }
219                            };
220
221                            page.close().await.ok();
222
223                            ValidationResult {
224                                target: ValidationTarget::Kasada,
225                                passed,
226                                score: None,
227                                details: HashMap::from([(
228                                    "phase".to_string(),
229                                    "load-check".to_string(),
230                                )]),
231                                screenshot: None,
232                                elapsed: std::time::Duration::ZERO,
233                            }
234                        }
235                        Err(e) => {
236                            ValidationResult::failed(ValidationTarget::Kasada, &e.to_string())
237                        }
238                    }
239                }
240                None => ValidationResult::failed(ValidationTarget::Kasada, "browser handle lost"),
241            }
242        }
243        Err(e) => ValidationResult::failed(ValidationTarget::Kasada, &e.to_string()),
244    }
245}
246
247/// Run the Cloudflare validator on a CF-protected site.
248///
249/// Navigates to a Cloudflare-protected page and checks if the page loads
250/// without a challenge block.
251pub async fn run_cloudflare(pool: &Arc<BrowserPool>) -> ValidationResult {
252    let start = Instant::now();
253    let result = cloudflare_impl(pool).await;
254    ValidationResult {
255        elapsed: start.elapsed(),
256        ..result
257    }
258}
259
260async fn cloudflare_impl(pool: &Arc<BrowserPool>) -> ValidationResult {
261    let url = ValidationTarget::Cloudflare.url();
262    debug!("Cloudflare validator: navigating to {url}");
263
264    match pool.acquire().await {
265        Ok(session) => match session.browser() {
266            Some(browser) => match browser.new_page().await {
267                Ok(mut page) => {
268                    let navigate_result = page
269                        .navigate(
270                            url,
271                            WaitUntil::DomContentLoaded,
272                            std::time::Duration::from_secs(20),
273                        )
274                        .await;
275
276                    let passed = navigate_result.is_ok();
277
278                    page.close().await.ok();
279
280                    ValidationResult {
281                        target: ValidationTarget::Cloudflare,
282                        passed,
283                        score: None,
284                        details: HashMap::from([("phase".to_string(), "load-check".to_string())]),
285                        screenshot: None,
286                        elapsed: std::time::Duration::ZERO,
287                    }
288                }
289                Err(e) => ValidationResult::failed(ValidationTarget::Cloudflare, &e.to_string()),
290            },
291            None => ValidationResult::failed(ValidationTarget::Cloudflare, "browser handle lost"),
292        },
293        Err(e) => ValidationResult::failed(ValidationTarget::Cloudflare, &e.to_string()),
294    }
295}
296
297/// Run the Akamai validator on an Akamai-protected site (e.g., `FedEx`).
298///
299/// Navigates to the `FedEx` tracking page and checks if the page loads
300/// without bot detection.
301pub async fn run_akamai(pool: &Arc<BrowserPool>) -> ValidationResult {
302    let start = Instant::now();
303    let result = akamai_impl(pool).await;
304    ValidationResult {
305        elapsed: start.elapsed(),
306        ..result
307    }
308}
309
310async fn akamai_impl(pool: &Arc<BrowserPool>) -> ValidationResult {
311    let url = ValidationTarget::Akamai.url();
312    debug!("Akamai validator: navigating to {url}");
313
314    match pool.acquire().await {
315        Ok(session) => match session.browser() {
316            Some(browser) => match browser.new_page().await {
317                Ok(mut page) => {
318                    let navigate_result = page
319                        .navigate(
320                            url,
321                            WaitUntil::DomContentLoaded,
322                            std::time::Duration::from_secs(20),
323                        )
324                        .await;
325
326                    let passed = navigate_result.is_ok();
327
328                    page.close().await.ok();
329
330                    ValidationResult {
331                        target: ValidationTarget::Akamai,
332                        passed,
333                        score: None,
334                        details: HashMap::from([("phase".to_string(), "load-check".to_string())]),
335                        screenshot: None,
336                        elapsed: std::time::Duration::ZERO,
337                    }
338                }
339                Err(e) => ValidationResult::failed(ValidationTarget::Akamai, &e.to_string()),
340            },
341            None => ValidationResult::failed(ValidationTarget::Akamai, "browser handle lost"),
342        },
343        Err(e) => ValidationResult::failed(ValidationTarget::Akamai, &e.to_string()),
344    }
345}