Skip to main content

stygian_browser/validation/
mod.rs

1//! Anti-bot service validation suite.
2//!
3//! Provides an automated testing framework that exercises stygian-browser's
4//! stealth posture against real anti-bot detection services and open-source
5//! fingerprint observatories.
6//!
7//! # Tier structure
8//!
9//! | Tier | Services | Rate limits | CI-safe |
10//! |------|----------|------------|---------|
11//! | 1 | [`ValidationTarget::CreepJs`], [`ValidationTarget::BrowserScan`] | None (open) | Yes |
12//! | 2 | [`ValidationTarget::Kasada`], [`ValidationTarget::Cloudflare`], [`ValidationTarget::Akamai`] | Yes | `#[ignore]` |
13//! | 3 | [`ValidationTarget::FingerprintJs`], [`ValidationTarget::DataDome`], [`ValidationTarget::PerimeterX`] | Account required | Manual |
14//!
15//! # Example
16//!
17//! ```no_run
18//! use stygian_browser::validation::{ValidationSuite, ValidationTarget};
19//! use stygian_browser::pool::BrowserPool;
20//! use stygian_browser::BrowserConfig;
21//!
22//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
23//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
24//! let targets = vec![ValidationTarget::CreepJs, ValidationTarget::BrowserScan];
25//! let results = ValidationSuite::run_all(&pool, &targets).await;
26//! for r in &results {
27//!     println!("{}: passed={} score={:?}", r.target, r.passed, r.score);
28//! }
29//! # Ok(())
30//! # }
31//! ```
32
33pub mod benchmark;
34pub mod validators;
35
36use std::collections::HashMap;
37use std::fmt;
38use std::sync::Arc;
39use std::time::Duration;
40
41use serde::{Deserialize, Serialize};
42
43use crate::pool::BrowserPool;
44
45// ---------------------------------------------------------------------------
46// ValidationTarget
47// ---------------------------------------------------------------------------
48
49/// The anti-bot or fingerprint-observatory services that can be probed.
50///
51/// # Example
52///
53/// ```
54/// use stygian_browser::validation::ValidationTarget;
55///
56/// assert_eq!(ValidationTarget::CreepJs.url(), "https://abrahamjuliot.github.io/creepjs/");
57/// assert_eq!(ValidationTarget::all().len(), 8);
58/// ```
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
60#[serde(rename_all = "snake_case")]
61pub enum ValidationTarget {
62    /// `CreepJS` — open-source comprehensive fingerprint observatory (Tier 1).
63    CreepJs,
64    /// `BrowserScan` authenticity percentage (Tier 1).
65    BrowserScan,
66    /// `FingerprintJS` Pro — detects canvas/audio/WebGL inconsistency (Tier 3).
67    FingerprintJs,
68    /// Kasada — two-phase token, iframe checks (Tier 2).
69    Kasada,
70    /// Cloudflare Turnstile / Bot Management (Tier 2).
71    Cloudflare,
72    /// Akamai sensor-data collection (Tier 2).
73    Akamai,
74    /// `DataDome` — e-commerce behavioral analysis (Tier 3).
75    DataDome,
76    /// `PerimeterX` — behavioral + fingerprint (Tier 3).
77    PerimeterX,
78}
79
80impl ValidationTarget {
81    /// Canonical entry-point URL for this target.
82    ///
83    /// # Example
84    ///
85    /// ```
86    /// use stygian_browser::validation::ValidationTarget;
87    ///
88    /// assert!(ValidationTarget::CreepJs.url().starts_with("https://"));
89    /// ```
90    #[must_use]
91    pub const fn url(self) -> &'static str {
92        match self {
93            Self::CreepJs => "https://abrahamjuliot.github.io/creepjs/",
94            Self::BrowserScan => "https://www.browserscan.net/",
95            Self::FingerprintJs => "https://fingerprint.com/demo/",
96            Self::Kasada => "https://www.wizzair.com/",
97            Self::Cloudflare => "https://www.cloudflare.com/",
98            Self::Akamai => "https://www.fedex.com/",
99            Self::DataDome => "https://datadome.co/",
100            Self::PerimeterX => "https://www.humansecurity.com/",
101        }
102    }
103
104    /// Whether this target is safe to run in automated CI (Tier 1 only).
105    ///
106    /// # Example
107    ///
108    /// ```
109    /// use stygian_browser::validation::ValidationTarget;
110    ///
111    /// assert!(ValidationTarget::CreepJs.is_ci_safe());
112    /// assert!(!ValidationTarget::Kasada.is_ci_safe());
113    /// ```
114    #[must_use]
115    pub const fn is_ci_safe(self) -> bool {
116        matches!(self, Self::CreepJs | Self::BrowserScan)
117    }
118
119    /// All 8 targets, in enum declaration order.
120    ///
121    /// # Example
122    ///
123    /// ```
124    /// use stygian_browser::validation::ValidationTarget;
125    ///
126    /// assert_eq!(ValidationTarget::all().len(), 8);
127    /// ```
128    #[must_use]
129    pub const fn all() -> &'static [Self] {
130        &[
131            Self::CreepJs,
132            Self::BrowserScan,
133            Self::FingerprintJs,
134            Self::Kasada,
135            Self::Cloudflare,
136            Self::Akamai,
137            Self::DataDome,
138            Self::PerimeterX,
139        ]
140    }
141
142    /// CI-safe Tier 1 targets only.
143    ///
144    /// # Example
145    ///
146    /// ```
147    /// use stygian_browser::validation::ValidationTarget;
148    ///
149    /// assert_eq!(ValidationTarget::tier1().len(), 2);
150    /// assert!(ValidationTarget::tier1().iter().all(|t| t.is_ci_safe()));
151    /// ```
152    #[must_use]
153    pub const fn tier1() -> &'static [Self] {
154        &[Self::CreepJs, Self::BrowserScan]
155    }
156}
157
158impl fmt::Display for ValidationTarget {
159    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
160        let s = match self {
161            Self::CreepJs => "CreepJS",
162            Self::BrowserScan => "BrowserScan",
163            Self::FingerprintJs => "FingerprintJS",
164            Self::Kasada => "Kasada",
165            Self::Cloudflare => "Cloudflare",
166            Self::Akamai => "Akamai",
167            Self::DataDome => "DataDome",
168            Self::PerimeterX => "PerimeterX",
169        };
170        f.write_str(s)
171    }
172}
173
174// ---------------------------------------------------------------------------
175// ValidationResult
176// ---------------------------------------------------------------------------
177
178/// The outcome of running a single anti-bot validator.
179///
180/// # Example
181///
182/// ```
183/// use std::collections::HashMap;
184/// use std::time::Duration;
185/// use stygian_browser::validation::{ValidationResult, ValidationTarget};
186///
187/// let r = ValidationResult {
188///     target: ValidationTarget::CreepJs,
189///     passed: true,
190///     score: Some(0.87),
191///     details: HashMap::new(),
192///     screenshot: None,
193///     elapsed: Duration::from_secs(5),
194/// };
195/// assert!(r.passed);
196/// ```
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct ValidationResult {
199    /// Which anti-bot service was tested.
200    pub target: ValidationTarget,
201    /// Did the page pass (not blocked, score above threshold)?
202    pub passed: bool,
203    /// Normalised 0.0–1.0 score, where applicable.
204    pub score: Option<f64>,
205    /// Target-specific extracted metrics as key/value pairs.
206    pub details: HashMap<String, String>,
207    /// PNG screenshot captured on failure (base64-encoded when serialised).
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub screenshot: Option<Vec<u8>>,
210    /// Wall-clock time taken for the validation.
211    #[serde(with = "duration_secs")]
212    pub elapsed: Duration,
213}
214
215impl ValidationResult {
216    /// Construct a failure result without a screenshot.
217    ///
218    /// # Example
219    ///
220    /// ```
221    /// use stygian_browser::validation::{ValidationResult, ValidationTarget};
222    ///
223    /// let r = ValidationResult::failed(ValidationTarget::CreepJs, "timeout");
224    /// assert!(!r.passed);
225    /// assert!(r.details.contains_key("error"));
226    /// ```
227    #[must_use]
228    pub fn failed(target: ValidationTarget, reason: &str) -> Self {
229        Self {
230            target,
231            passed: false,
232            score: None,
233            details: HashMap::from([("error".to_string(), reason.to_string())]),
234            screenshot: None,
235            elapsed: Duration::ZERO,
236        }
237    }
238}
239
240// Serde helper: Duration ↔ f64 seconds
241mod duration_secs {
242    use std::time::Duration;
243
244    use serde::{Deserialize, Deserializer, Serialize, Serializer};
245
246    pub(super) fn serialize<S>(d: &Duration, ser: S) -> Result<S::Ok, S::Error>
247    where
248        S: Serializer,
249    {
250        d.as_secs_f64().serialize(ser)
251    }
252
253    pub(super) fn deserialize<'de, D>(de: D) -> Result<Duration, D::Error>
254    where
255        D: Deserializer<'de>,
256    {
257        f64::deserialize(de).map(Duration::from_secs_f64)
258    }
259}
260
261// ---------------------------------------------------------------------------
262// ValidationSuite
263// ---------------------------------------------------------------------------
264
265/// Runs one or more anti-bot validators against the given [`BrowserPool`].
266///
267/// # Example
268///
269/// ```
270/// use stygian_browser::validation::{ValidationSuite, ValidationTarget};
271///
272/// // Empty target list returns empty results immediately.
273/// ```
274pub struct ValidationSuite;
275
276impl ValidationSuite {
277    /// Run all specified targets sequentially and collect results.
278    ///
279    /// Returns immediately with an empty `Vec` if `targets` is empty.
280    ///
281    /// # Example
282    ///
283    /// ```no_run
284    /// use stygian_browser::validation::{ValidationSuite, ValidationTarget};
285    /// use stygian_browser::pool::BrowserPool;
286    /// use stygian_browser::BrowserConfig;
287    /// use std::sync::Arc;
288    ///
289    /// # async fn run() -> Result<(), Box<dyn std::error::Error>> {
290    /// let pool = Arc::new(BrowserPool::new(BrowserConfig::default()).await?);
291    /// let results = ValidationSuite::run_all(&pool, &[]).await;
292    /// assert!(results.is_empty());
293    /// # Ok(())
294    /// # }
295    /// ```
296    pub async fn run_all(
297        pool: &Arc<BrowserPool>,
298        targets: &[ValidationTarget],
299    ) -> Vec<ValidationResult> {
300        // Run sequentially to avoid saturating the browser pool.
301        let mut results = Vec::with_capacity(targets.len());
302        for &target in targets {
303            results.push(Self::run_one(pool, target).await);
304        }
305        results
306    }
307
308    /// Run a single validator and return its result.
309    ///
310    /// # Example
311    ///
312    /// ```no_run
313    /// use stygian_browser::validation::{ValidationSuite, ValidationTarget};
314    /// use stygian_browser::pool::BrowserPool;
315    /// use stygian_browser::BrowserConfig;
316    /// use std::sync::Arc;
317    ///
318    /// # async fn run() -> Result<(), Box<dyn std::error::Error>> {
319    /// let pool = Arc::new(BrowserPool::new(BrowserConfig::default()).await?);
320    /// let result = ValidationSuite::run_one(&pool, ValidationTarget::CreepJs).await;
321    /// println!("passed: {}", result.passed);
322    /// # Ok(())
323    /// # }
324    /// ```
325    pub async fn run_one(pool: &Arc<BrowserPool>, target: ValidationTarget) -> ValidationResult {
326        match target {
327            ValidationTarget::CreepJs => validators::run_creepjs(pool).await,
328            ValidationTarget::BrowserScan => validators::run_browserscan(pool).await,
329            ValidationTarget::Kasada => validators::run_kasada(pool).await,
330            ValidationTarget::Cloudflare => validators::run_cloudflare(pool).await,
331            ValidationTarget::Akamai => validators::run_akamai(pool).await,
332            // Tier 3: not automated — return a documented stub result.
333            ValidationTarget::FingerprintJs => ValidationResult::failed(
334                target,
335                "FingerprintJS Pro validation requires a Pro account — not automated",
336            ),
337            ValidationTarget::DataDome => ValidationResult::failed(
338                target,
339                "DataDome validation requires a Pro account — not automated",
340            ),
341            ValidationTarget::PerimeterX => ValidationResult::failed(
342                target,
343                "PerimeterX validation requires a Pro account — not automated",
344            ),
345        }
346    }
347}
348
349// ---------------------------------------------------------------------------
350// Tests
351// ---------------------------------------------------------------------------
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    // ── ValidationResult serde round-trip ──────────────────────────────────────
358
359    #[test]
360    fn result_serde_round_trip() {
361        let original = ValidationResult {
362            target: ValidationTarget::CreepJs,
363            passed: true,
364            score: Some(0.92),
365            details: HashMap::from([("trust_score".to_string(), "92%".to_string())]),
366            screenshot: None,
367            elapsed: Duration::from_millis(3800),
368        };
369
370        let json_result = serde_json::to_string(&original);
371        assert!(json_result.is_ok(), "serialize failed: {json_result:?}");
372        let Ok(json) = json_result else {
373            return;
374        };
375        let decoded_result: Result<ValidationResult, _> = serde_json::from_str(&json);
376        assert!(
377            decoded_result.is_ok(),
378            "deserialize failed: {decoded_result:?}"
379        );
380        let Ok(decoded) = decoded_result else {
381            return;
382        };
383
384        assert_eq!(decoded.target, original.target);
385        assert_eq!(decoded.passed, original.passed);
386        assert!(decoded.score.is_some(), "missing score in decoded result");
387        let Some(score) = decoded.score else {
388            return;
389        };
390        assert!((score - 0.92_f64).abs() < 1e-9);
391        let trust_score = decoded.details.get("trust_score");
392        assert_eq!(trust_score, Some(&"92%".to_string()));
393        assert!((decoded.elapsed.as_secs_f64() - 3.8_f64).abs() < 1e-6);
394    }
395
396    // ── Enum coverage ─────────────────────────────────────────────────────────
397
398    #[test]
399    fn all_targets_covered() {
400        let all = ValidationTarget::all();
401        assert_eq!(all.len(), 8, "all() must cover all 8 variants");
402
403        // Spot-check URLs are non-empty HTTPS
404        for t in all {
405            let url = t.url();
406            assert!(url.starts_with("https://"), "URL for {t} must use HTTPS");
407        }
408    }
409
410    #[test]
411    fn tier1_is_ci_safe() {
412        let tier1 = ValidationTarget::tier1();
413        assert_eq!(tier1.len(), 2);
414        for t in tier1 {
415            assert!(t.is_ci_safe(), "{t} must be CI-safe");
416        }
417    }
418
419    #[test]
420    fn tier2_not_ci_safe() {
421        let tier2 = [
422            ValidationTarget::Kasada,
423            ValidationTarget::Cloudflare,
424            ValidationTarget::Akamai,
425        ];
426        for t in tier2 {
427            assert!(!t.is_ci_safe(), "{t} must NOT be CI-safe");
428        }
429    }
430
431    // ── Display ───────────────────────────────────────────────────────────────
432
433    #[test]
434    fn display_names() {
435        assert_eq!(ValidationTarget::CreepJs.to_string(), "CreepJS");
436        assert_eq!(ValidationTarget::BrowserScan.to_string(), "BrowserScan");
437        assert_eq!(ValidationTarget::FingerprintJs.to_string(), "FingerprintJS");
438        assert_eq!(ValidationTarget::Kasada.to_string(), "Kasada");
439        assert_eq!(ValidationTarget::Cloudflare.to_string(), "Cloudflare");
440        assert_eq!(ValidationTarget::Akamai.to_string(), "Akamai");
441        assert_eq!(ValidationTarget::DataDome.to_string(), "DataDome");
442        assert_eq!(ValidationTarget::PerimeterX.to_string(), "PerimeterX");
443    }
444
445    // ── Integration (requires network + browser) ──────────────────────────────
446
447    #[tokio::test]
448    #[ignore = "requires network connectivity and a running Chrome binary"]
449    async fn live_creepjs_returns_score() {
450        use crate::BrowserConfig;
451        use crate::pool::BrowserPool;
452
453        let pool_result = BrowserPool::new(BrowserConfig::default()).await;
454        assert!(pool_result.is_ok(), "pool init failed");
455        let Ok(pool) = pool_result else {
456            return;
457        };
458        let result = ValidationSuite::run_one(&pool, ValidationTarget::CreepJs).await;
459        assert!(
460            result.score.is_some(),
461            "CreepJS should return a score: {:?}",
462            result.details
463        );
464    }
465
466    #[tokio::test]
467    #[ignore = "requires network connectivity and a running Chrome binary"]
468    async fn live_browserscan_returns_percentage() {
469        use crate::BrowserConfig;
470        use crate::pool::BrowserPool;
471
472        let pool_result = BrowserPool::new(BrowserConfig::default()).await;
473        assert!(pool_result.is_ok(), "pool init failed");
474        let Ok(pool) = pool_result else {
475            return;
476        };
477        let result = ValidationSuite::run_one(&pool, ValidationTarget::BrowserScan).await;
478        assert!(
479            result.score.is_some(),
480            "BrowserScan should return a score: {:?}",
481            result.details
482        );
483    }
484
485    #[tokio::test]
486    #[ignore = "requires network connectivity and a running Chrome binary"]
487    async fn tier1_non_regression_against_optional_baseline() {
488        use crate::BrowserConfig;
489        use crate::pool::BrowserPool;
490        use std::sync::Arc;
491
492        let pool_result = BrowserPool::new(BrowserConfig::default()).await;
493        assert!(pool_result.is_ok(), "pool init failed");
494        let Ok(pool) = pool_result else {
495            return;
496        };
497        let pool = Arc::new(pool);
498
499        let results = ValidationSuite::run_all(&pool, ValidationTarget::tier1()).await;
500        assert_eq!(results.len(), 2, "tier1 should execute exactly two targets");
501
502        for result in &results {
503            assert!(
504                result.score.is_some(),
505                "{} should return a score for baseline comparison: {:?}",
506                result.target,
507                result.details
508            );
509        }
510
511        // Optional per-target baseline scores can be supplied by CI or local runners.
512        // If omitted, this test still validates Tier1 score extraction correctness.
513        let creepjs_baseline = std::env::var("STYGIAN_TIER1_BASELINE_CREEPJS")
514            .ok()
515            .and_then(|v| v.parse::<f64>().ok());
516        let browserscan_baseline = std::env::var("STYGIAN_TIER1_BASELINE_BROWSERSCAN")
517            .ok()
518            .and_then(|v| v.parse::<f64>().ok());
519
520        for result in results {
521            let Some(score) = result.score else {
522                continue;
523            };
524            match result.target {
525                ValidationTarget::CreepJs => {
526                    if let Some(baseline) = creepjs_baseline {
527                        assert!(
528                            score >= baseline,
529                            "CreepJS score regressed: score={score:.4}, baseline={baseline:.4}, details={:?}",
530                            result.details
531                        );
532                    }
533                }
534                ValidationTarget::BrowserScan => {
535                    if let Some(baseline) = browserscan_baseline {
536                        assert!(
537                            score >= baseline,
538                            "BrowserScan score regressed: score={score:.4}, baseline={baseline:.4}, details={:?}",
539                            result.details
540                        );
541                    }
542                }
543                _ => {}
544            }
545        }
546    }
547
548    #[tokio::test]
549    #[ignore = "requires network connectivity and a running Chrome binary"]
550    async fn live_about_blank_webgl_vendor_not_swiftshader() {
551        use crate::BrowserConfig;
552        use crate::WaitUntil;
553        use crate::config::StealthLevel;
554        use crate::diagnostic::CheckId;
555        use crate::pool::BrowserPool;
556
557        let config = BrowserConfig::builder()
558            .headless(true)
559            .stealth_level(StealthLevel::Advanced)
560            .build();
561
562        let pool_result = BrowserPool::new(config).await;
563        assert!(pool_result.is_ok(), "pool init failed");
564        let Ok(pool) = pool_result else {
565            return;
566        };
567
568        let handle_result = pool.acquire().await;
569        assert!(handle_result.is_ok(), "acquire failed");
570        let Ok(handle) = handle_result else {
571            return;
572        };
573
574        let browser = handle.browser();
575        assert!(browser.is_some(), "browser handle no longer valid");
576        let Some(browser) = browser else {
577            handle.release().await;
578            return;
579        };
580
581        let page_result = browser.new_page().await;
582        assert!(page_result.is_ok(), "new_page failed");
583        let Ok(mut page) = page_result else {
584            handle.release().await;
585            return;
586        };
587
588        let nav_result = page
589            .navigate(
590                "about:blank",
591                WaitUntil::DomContentLoaded,
592                Duration::from_secs(20),
593            )
594            .await;
595        assert!(nav_result.is_ok(), "navigate failed: {nav_result:?}");
596        let verify_result = page.verify_stealth().await;
597        assert!(
598            verify_result.is_ok(),
599            "verify_stealth failed: {verify_result:?}"
600        );
601
602        let Ok(report) = verify_result else {
603            let _ = page.close().await;
604            handle.release().await;
605            return;
606        };
607
608        let webgl_check = report.checks.iter().find(|c| c.id == CheckId::WebGlVendor);
609        assert!(
610            webgl_check.is_some(),
611            "web_gl_vendor check missing from report"
612        );
613        let Some(webgl_check) = webgl_check else {
614            let _ = page.close().await;
615            handle.release().await;
616            return;
617        };
618
619        assert!(
620            webgl_check.passed,
621            "web_gl_vendor failed: {}",
622            webgl_check.details
623        );
624        assert!(
625            !webgl_check.details.contains("SwiftShader"),
626            "web_gl_vendor details still expose SwiftShader: {}",
627            webgl_check.details
628        );
629
630        let _ = page.close().await;
631        handle.release().await;
632    }
633
634    #[tokio::test]
635    #[ignore = "requires network connectivity and a running Chrome binary"]
636    async fn live_kasada_wizzair_not_blocked() {
637        use crate::BrowserConfig;
638        use crate::pool::BrowserPool;
639
640        let pool_result = BrowserPool::new(BrowserConfig::default()).await;
641        assert!(pool_result.is_ok(), "pool init failed");
642        let Ok(pool) = pool_result else {
643            return;
644        };
645        let result = ValidationSuite::run_one(&pool, ValidationTarget::Kasada).await;
646        assert!(
647            result.passed,
648            "WizzAir should not block us: {:?}",
649            result.details
650        );
651    }
652}