Skip to main content

stygian_browser/
page.rs

1//!
2//! ## Resource blocking
3//!
4//! ## Wait strategies
5//!
6//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
7//! - `DomContentLoaded` — fires when the HTML is parsed
8//!
9//! # Example
10//!
11//! ```no_run
12//! use stygian_browser::{BrowserPool, BrowserConfig};
13//! use stygian_browser::page::{ResourceFilter, WaitUntil};
14//! use std::time::Duration;
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
18//! let handle = pool.acquire().await?;
19//!
20//! let mut page = handle.browser().expect("valid browser").new_page().await?;
21//! page.set_resource_filter(ResourceFilter::block_media()).await?;
22//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
23//! let title = page.title().await?;
24//! println!("title: {title}");
25//! handle.release().await;
26//! # Ok(())
27//! # }
28//! ```
29
30use std::collections::HashMap;
31use std::sync::{
32    Arc,
33    atomic::{AtomicU16, Ordering},
34};
35use std::time::Duration;
36
37use chromiumoxide::Page;
38use serde::{Deserialize, Serialize};
39use tokio::time::timeout;
40use tracing::{debug, warn};
41
42use crate::error::{BrowserError, Result};
43
44// ─── ResourceType ─────────────────────────────────────────────────────────────
45
46/// CDP resource types that can be intercepted.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum ResourceType {
49    /// `<img>`, `<picture>`, background images
50    Image,
51    /// Web fonts loaded via CSS `@font-face`
52    Font,
53    /// External CSS stylesheets
54    Stylesheet,
55    /// Media files (audio/video)
56    Media,
57}
58
59impl ResourceType {
60    pub const fn as_cdp_str(&self) -> &'static str {
61        match self {
62            Self::Image => "Image",
63            Self::Font => "Font",
64            Self::Stylesheet => "Stylesheet",
65            Self::Media => "Media",
66        }
67    }
68}
69
70// ─── ResourceFilter ───────────────────────────────────────────────────────────
71
72///
73/// # Example
74///
75/// ```
76/// use stygian_browser::page::ResourceFilter;
77/// let filter = ResourceFilter::block_media();
78/// assert!(filter.should_block("Image"));
79/// ```
80#[derive(Debug, Clone, Default)]
81pub struct ResourceFilter {
82    blocked: Vec<ResourceType>,
83}
84
85impl ResourceFilter {
86    /// Block all media resources (images, fonts, CSS, audio/video).
87    pub fn block_media() -> Self {
88        Self {
89            blocked: vec![
90                ResourceType::Image,
91                ResourceType::Font,
92                ResourceType::Stylesheet,
93                ResourceType::Media,
94            ],
95        }
96    }
97
98    pub fn block_images_and_fonts() -> Self {
99        Self {
100            blocked: vec![ResourceType::Image, ResourceType::Font],
101        }
102    }
103
104    #[must_use]
105    pub fn block(mut self, resource: ResourceType) -> Self {
106        if !self.blocked.contains(&resource) {
107            self.blocked.push(resource);
108        }
109        self
110    }
111
112    pub fn should_block(&self, cdp_type: &str) -> bool {
113        self.blocked
114            .iter()
115            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
116    }
117
118    pub const fn is_empty(&self) -> bool {
119        self.blocked.is_empty()
120    }
121}
122
123// ─── WaitUntil ────────────────────────────────────────────────────────────────
124
125///
126/// # Example
127///
128/// ```
129/// use stygian_browser::page::WaitUntil;
130/// ```
131/// Specifies what condition to wait for after a page navigation.
132#[derive(Debug, Clone)]
133pub enum WaitUntil {
134    /// Fires when the initial HTML is fully parsed, without waiting for
135    /// subresources such as images and stylesheets to finish loading.
136    DomContentLoaded,
137    NetworkIdle,
138    Selector(String),
139}
140
141// ─── NodeHandle ───────────────────────────────────────────────────────────────
142
143///
144/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
145/// reference — no HTML serialisation occurs.
146///
147/// A handle becomes **stale** after page navigation or if the underlying DOM
148/// node is removed.  Stale calls return [`BrowserError::StaleNode`] so callers
149/// can distinguish them from other CDP failures.
150///
151/// # Example
152///
153/// ```no_run
154/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
155/// use std::time::Duration;
156///
157/// # async fn run() -> stygian_browser::error::Result<()> {
158/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
159/// let handle = pool.acquire().await?;
160/// let mut page = handle.browser().expect("valid browser").new_page().await?;
161/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
162/// # let nodes = page.query_selector_all("a").await?;
163/// # for node in &nodes {
164///     let href = node.attr("href").await?;
165///     let text = node.text_content().await?;
166///     println!("{text}: {href:?}");
167/// # }
168/// # Ok(())
169/// # }
170/// ```
171pub struct NodeHandle {
172    element: chromiumoxide::element::Element,
173    /// Shared via `Arc<str>` so all handles from a single query reuse the
174    /// same allocation rather than cloning a `String` per node.
175    selector: Arc<str>,
176    cdp_timeout: Duration,
177    /// during DOM traversal (parent / sibling navigation).
178    page: chromiumoxide::Page,
179}
180
181impl NodeHandle {
182    /// Return a single attribute value, or `None` if the attribute is absent.
183    ///
184    /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
185    ///
186    /// # Errors
187    ///
188    /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
189    /// on transport-level failures.
190    pub async fn attr(&self, name: &str) -> Result<Option<String>> {
191        timeout(self.cdp_timeout, self.element.attribute(name))
192            .await
193            .map_err(|_| BrowserError::Timeout {
194                operation: "NodeHandle::attr".to_string(),
195                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
196            })?
197            .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
198    }
199
200    /// Return all attributes as a `HashMap<name, value>` in a **single**
201    /// CDP round-trip.
202    ///
203    /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
204    /// which returns a flat `[name, value, name, value, …]` list from the node
205    /// description — no per-attribute calls are needed.
206    ///
207    /// # Errors
208    ///
209    /// invalidated.
210    pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
211        let flat = timeout(self.cdp_timeout, self.element.attributes())
212            .await
213            .map_err(|_| BrowserError::Timeout {
214                operation: "NodeHandle::attr_map".to_string(),
215                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
216            })?
217            .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
218
219        let mut map = HashMap::with_capacity(flat.len() / 2);
220        for pair in flat.chunks_exact(2) {
221            if let [name, value] = pair {
222                map.insert(name.clone(), value.clone());
223            }
224        }
225        Ok(map)
226    }
227
228    /// Return the element's `textContent` (all text inside, no markup).
229    ///
230    /// Reads the DOM `textContent` property via a single JS eval — this is the
231    /// raw text concatenation of all descendant text nodes, independent of
232    /// layout or visibility (unlike `innerText`).
233    ///
234    ///
235    /// # Errors
236    ///
237    /// invalidated.
238    pub async fn text_content(&self) -> Result<String> {
239        let returns = timeout(
240            self.cdp_timeout,
241            self.element
242                .call_js_fn(r"function() { return this.textContent ?? ''; }", true),
243        )
244        .await
245        .map_err(|_| BrowserError::Timeout {
246            operation: "NodeHandle::text_content".to_string(),
247            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
248        })?
249        .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))?;
250
251        Ok(returns
252            .result
253            .value
254            .as_ref()
255            .and_then(|v| v.as_str())
256            .unwrap_or("")
257            .to_string())
258    }
259
260    /// Return the element's `innerHTML`.
261    ///
262    ///
263    /// # Errors
264    ///
265    /// invalidated.
266    pub async fn inner_html(&self) -> Result<String> {
267        timeout(self.cdp_timeout, self.element.inner_html())
268            .await
269            .map_err(|_| BrowserError::Timeout {
270                operation: "NodeHandle::inner_html".to_string(),
271                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
272            })?
273            .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
274            .map(Option::unwrap_or_default)
275    }
276
277    /// Return the element's `outerHTML`.
278    ///
279    ///
280    /// # Errors
281    ///
282    /// invalidated.
283    pub async fn outer_html(&self) -> Result<String> {
284        timeout(self.cdp_timeout, self.element.outer_html())
285            .await
286            .map_err(|_| BrowserError::Timeout {
287                operation: "NodeHandle::outer_html".to_string(),
288                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
289            })?
290            .map_err(|e| self.cdp_err_or_stale(&e, "outer_html"))
291            .map(Option::unwrap_or_default)
292    }
293
294    ///
295    /// Executes a single `Runtime.callFunctionOn` JavaScript function that
296    /// walks `parentElement` and collects tag names — no repeated CDP calls.
297    ///
298    /// ```text
299    /// ["p", "article", "body", "html"]
300    /// ```
301    ///
302    /// # Errors
303    ///
304    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when CDP
305    pub async fn ancestors(&self) -> Result<Vec<String>> {
306        let returns = timeout(
307            self.cdp_timeout,
308            self.element.call_js_fn(
309                r"function() {
310                    const a = [];
311                    let n = this.parentElement;
312                    while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
313                    return a;
314                }",
315                true,
316            ),
317        )
318        .await
319        .map_err(|_| BrowserError::Timeout {
320            operation: "NodeHandle::ancestors".to_string(),
321            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
322        })?
323        .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
324
325        // With returnByValue=true and an array return, CDP delivers the value
326        // as a JSON array directly — no JSON.stringify/re-parse needed.
327        // A missing or wrong-type value indicates an unexpected CDP failure.
328        let arr = returns
329            .result
330            .value
331            .as_ref()
332            .and_then(|v| v.as_array())
333            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
334                script: "NodeHandle::ancestors".to_string(),
335                reason: "CDP returned no value or a non-array value for ancestors()".to_string(),
336            })?;
337
338        arr.iter()
339            .map(|v| {
340                v.as_str().map(ToString::to_string).ok_or_else(|| {
341                    BrowserError::ScriptExecutionFailed {
342                        script: "NodeHandle::ancestors".to_string(),
343                        reason: format!("ancestor entry is not a string: {v}"),
344                    }
345                })
346            })
347            .collect()
348    }
349
350    ///
351    ///
352    ///
353    /// # Errors
354    ///
355    /// invalidated, or [`BrowserError::CdpError`] on transport failure.
356    pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
357        let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
358            .await
359            .map_err(|_| BrowserError::Timeout {
360                operation: "NodeHandle::children_matching".to_string(),
361                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
362            })?
363            .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
364
365        let selector_arc: Arc<str> = Arc::from(selector);
366        Ok(elements
367            .into_iter()
368            .map(|el| Self {
369                element: el,
370                selector: selector_arc.clone(),
371                cdp_timeout: self.cdp_timeout,
372                page: self.page.clone(),
373            })
374            .collect())
375    }
376
377    /// Return the immediate parent element, or `None` if this element has no
378    /// parent (i.e. it is the document root).
379    ///
380    /// Issues a single `Runtime.callFunctionOn` CDP call that temporarily tags
381    /// the parent element with a unique attribute, then resolves it via a
382    /// CSS attribute selector.
383    ///
384    /// # Errors
385    ///
386    /// Returns an error if the CDP call fails or the page handle is invalidated.
387    ///
388    /// # Example
389    ///
390    /// ```no_run
391    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
392    /// use std::time::Duration;
393    ///
394    /// # async fn run() -> stygian_browser::error::Result<()> {
395    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
396    /// let handle = pool.acquire().await?;
397    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
398    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
399    /// # let nodes = page.query_selector_all("a").await?;
400    /// if let Some(parent) = nodes[0].parent().await? {
401    ///     let html = parent.outer_html().await?;
402    ///     println!("parent: {}", &html[..html.len().min(80)]);
403    /// }
404    /// # Ok(())
405    /// # }
406    /// ```
407    pub async fn parent(&self) -> Result<Option<Self>> {
408        let attr = format!(
409            "data-stygian-t-{}",
410            ulid::Ulid::new().to_string().to_lowercase()
411        );
412        let js = format!(
413            "function() {{ \
414                var t = this.parentElement; \
415                if (!t) {{ return false; }} \
416                t.setAttribute('{attr}', '1'); \
417                return true; \
418            }}"
419        );
420        self.call_traversal(&js, &attr, "parent").await
421    }
422
423    /// Return the next element sibling, or `None` if this element is the last
424    /// child of its parent.
425    ///
426    /// Uses `nextElementSibling` (skips text/comment nodes).
427    ///
428    /// # Errors
429    ///
430    /// invalidated.
431    ///
432    /// # Example
433    ///
434    /// ```no_run
435    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
436    /// use std::time::Duration;
437    ///
438    /// # async fn run() -> stygian_browser::error::Result<()> {
439    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
440    /// let handle = pool.acquire().await?;
441    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
442    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
443    /// # let nodes = page.query_selector_all("a").await?;
444    /// if let Some(next) = nodes[0].next_sibling().await? {
445    ///     println!("next sibling: {}", next.text_content().await?);
446    /// }
447    /// # Ok(())
448    /// # }
449    /// ```
450    pub async fn next_sibling(&self) -> Result<Option<Self>> {
451        let attr = format!(
452            "data-stygian-t-{}",
453            ulid::Ulid::new().to_string().to_lowercase()
454        );
455        let js = format!(
456            "function() {{ \
457                var t = this.nextElementSibling; \
458                if (!t) {{ return false; }} \
459                t.setAttribute('{attr}', '1'); \
460                return true; \
461            }}"
462        );
463        self.call_traversal(&js, &attr, "next").await
464    }
465
466    /// Return the previous element sibling, or `None` if this element is the
467    /// first child of its parent.
468    ///
469    /// Uses `previousElementSibling` (skips text/comment nodes).
470    ///
471    /// # Errors
472    ///
473    /// invalidated.
474    ///
475    /// # Example
476    ///
477    /// ```no_run
478    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
479    /// use std::time::Duration;
480    ///
481    /// # async fn run() -> stygian_browser::error::Result<()> {
482    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
483    /// let handle = pool.acquire().await?;
484    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
485    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
486    /// # let nodes = page.query_selector_all("a").await?;
487    /// if let Some(prev) = nodes[1].previous_sibling().await? {
488    ///     println!("prev sibling: {}", prev.text_content().await?);
489    /// }
490    /// # Ok(())
491    /// # }
492    /// ```
493    pub async fn previous_sibling(&self) -> Result<Option<Self>> {
494        let attr = format!(
495            "data-stygian-t-{}",
496            ulid::Ulid::new().to_string().to_lowercase()
497        );
498        let js = format!(
499            "function() {{ \
500                var t = this.previousElementSibling; \
501                if (!t) {{ return false; }} \
502                t.setAttribute('{attr}', '1'); \
503                return true; \
504            }}"
505        );
506        self.call_traversal(&js, &attr, "prev").await
507    }
508
509    /// Shared traversal implementation used by [`parent`], [`next_sibling`],
510    /// and [`previous_sibling`].
511    ///
512    /// The caller provides a JS function that:
513    /// 1. Computes the traversal target (for example, the parent, next
514    ///    sibling, or previous sibling) and stores it in a local variable.
515    /// 2. If the target is non-null, sets a unique attribute (`attr_name`)
516    ///    on it and returns `true`.
517    /// 3. Returns `false` when the target is null (no such neighbour).
518    ///
519    /// This helper then resolves the tagged element from the document root,
520    /// removes the temporary attribute, and wraps the result in a
521    /// `NodeHandle`.
522    ///
523    /// [`parent`]: Self::parent
524    /// [`next_sibling`]: Self::next_sibling
525    /// [`previous_sibling`]: Self::previous_sibling
526    async fn call_traversal(
527        &self,
528        js_fn: &str,
529        attr_name: &str,
530        selector_suffix: &str,
531    ) -> Result<Option<Self>> {
532        // Step 1: Run the JS that tags the target element and reports null/non-null.
533        let op_tag = format!("NodeHandle::{selector_suffix}::tag");
534        let returns = timeout(self.cdp_timeout, self.element.call_js_fn(js_fn, false))
535            .await
536            .map_err(|_| BrowserError::Timeout {
537                operation: op_tag.clone(),
538                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
539            })?
540            .map_err(|e| self.cdp_err_or_stale(&e, selector_suffix))?;
541
542        // JS returns false → no such neighbour.
543        let has_target = returns
544            .result
545            .value
546            .as_ref()
547            .and_then(serde_json::Value::as_bool)
548            .unwrap_or(false);
549        if !has_target {
550            return Ok(None);
551        }
552
553        let css = format!("[{attr_name}]");
554        let op_resolve = format!("NodeHandle::{selector_suffix}::resolve");
555        let element = timeout(self.cdp_timeout, self.page.find_element(css))
556            .await
557            .map_err(|_| BrowserError::Timeout {
558                operation: op_resolve.clone(),
559                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
560            })?
561            .map_err(|e| BrowserError::CdpError {
562                operation: op_resolve,
563                message: format!("{e:?}"),
564            })?;
565
566        // is non-fatal — it leaves a harmless stale attribute in the DOM).
567        let cleanup = format!("function() {{ this.removeAttribute('{attr_name}'); }}");
568        let _ = element.call_js_fn(cleanup, false).await;
569
570        let new_selector: Arc<str> =
571            Arc::from(format!("{}::{selector_suffix}", self.selector).as_str());
572        Ok(Some(Self {
573            element,
574            selector: new_selector,
575            cdp_timeout: self.cdp_timeout,
576            page: self.page.clone(),
577        }))
578    }
579
580    /// (when the remote object reference has been invalidated) or
581    fn cdp_err_or_stale(
582        &self,
583        err: &chromiumoxide::error::CdpError,
584        operation: &str,
585    ) -> BrowserError {
586        let msg = format!("{err:?}");
587        if msg.contains("Cannot find object with id")
588            || msg.contains("context with specified id")
589            || msg.contains("Cannot find context")
590        {
591            BrowserError::StaleNode {
592                selector: self.selector.to_string(),
593            }
594        } else {
595            BrowserError::CdpError {
596                operation: operation.to_string(),
597                message: msg,
598            }
599        }
600    }
601}
602
603// ─── PageHandle ───────────────────────────────────────────────────────────────
604
605///
606///
607/// # Example
608///
609/// ```no_run
610/// use stygian_browser::{BrowserPool, BrowserConfig};
611/// use stygian_browser::page::WaitUntil;
612/// use std::time::Duration;
613///
614/// # async fn run() -> stygian_browser::error::Result<()> {
615/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
616/// let handle = pool.acquire().await?;
617/// let mut page = handle.browser().expect("valid browser").new_page().await?;
618/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
619/// let html = page.content().await?;
620/// drop(page); // closes the tab
621/// handle.release().await;
622/// # Ok(())
623/// # }
624/// ```
625pub struct PageHandle {
626    page: Page,
627    cdp_timeout: Duration,
628    /// HTTP status code of the most recent main-frame navigation, or `0` if not
629    last_status_code: Arc<AtomicU16>,
630    /// Background task processing `Fetch.requestPaused` events. Aborted and
631    /// replaced each time `set_resource_filter` is called.
632    resource_filter_task: Option<tokio::task::JoinHandle<()>>,
633}
634
635impl PageHandle {
636    /// Wrap a raw chromiumoxide [`Page`] in a handle.
637    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
638        Self {
639            page,
640            cdp_timeout,
641            last_status_code: Arc::new(AtomicU16::new(0)),
642            resource_filter_task: None,
643        }
644    }
645
646    ///
647    /// # Errors
648    ///
649    /// the CDP call fails.
650    pub async fn navigate(
651        &mut self,
652        url: &str,
653        condition: WaitUntil,
654        nav_timeout: Duration,
655    ) -> Result<()> {
656        self.setup_status_capture().await;
657        timeout(
658            nav_timeout,
659            self.navigate_inner(url, condition, nav_timeout),
660        )
661        .await
662        .map_err(|_| BrowserError::NavigationFailed {
663            url: url.to_string(),
664            reason: format!("navigation timed out after {nav_timeout:?}"),
665        })?
666    }
667
668    /// Reset the last status code and wire up the `Network.responseReceived`
669    /// so that a missing network domain never blocks navigation.
670    async fn setup_status_capture(&self) {
671        use chromiumoxide::cdp::browser_protocol::network::{
672            EventResponseReceived, ResourceType as NetworkResourceType,
673        };
674        use futures::StreamExt;
675
676        // Reset so a stale code is not returned if the new navigation fails
677        self.last_status_code.store(0, Ordering::Release);
678
679        let page_for_listener = self.page.clone();
680        let status_capture = Arc::clone(&self.last_status_code);
681        match page_for_listener
682            .event_listener::<EventResponseReceived>()
683            .await
684        {
685            Ok(mut stream) => {
686                tokio::spawn(async move {
687                    while let Some(event) = stream.next().await {
688                        if event.r#type == NetworkResourceType::Document {
689                            let code = u16::try_from(event.response.status).unwrap_or(0);
690                            if code > 0 {
691                                status_capture.store(code, Ordering::Release);
692                            }
693                            break;
694                        }
695                    }
696                });
697            }
698            Err(e) => warn!("status-code capture unavailable: {e}"),
699        }
700    }
701
702    /// described in issue #7.
703    async fn navigate_inner(
704        &self,
705        url: &str,
706        condition: WaitUntil,
707        nav_timeout: Duration,
708    ) -> Result<()> {
709        use chromiumoxide::cdp::browser_protocol::page::{
710            EventDomContentEventFired, EventLoadEventFired,
711        };
712        use futures::StreamExt;
713
714        let url_owned = url.to_string();
715
716        let mut dom_events = match &condition {
717            WaitUntil::DomContentLoaded => Some(
718                self.page
719                    .event_listener::<EventDomContentEventFired>()
720                    .await
721                    .map_err(|e| BrowserError::NavigationFailed {
722                        url: url_owned.clone(),
723                        reason: format!("{e:?}"),
724                    })?,
725            ),
726            _ => None,
727        };
728
729        let mut load_events = match &condition {
730            WaitUntil::NetworkIdle => Some(
731                self.page
732                    .event_listener::<EventLoadEventFired>()
733                    .await
734                    .map_err(|e| BrowserError::NavigationFailed {
735                        url: url_owned.clone(),
736                        reason: e.to_string(),
737                    })?,
738            ),
739            _ => None,
740        };
741
742        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
743            Some(self.subscribe_inflight_counter().await)
744        } else {
745            None
746        };
747
748        self.page
749            .goto(url)
750            .await
751            .map_err(|e| BrowserError::NavigationFailed {
752                url: url_owned.clone(),
753                reason: e.to_string(),
754            })?;
755
756        match &condition {
757            WaitUntil::DomContentLoaded => {
758                if let Some(ref mut events) = dom_events {
759                    let _ = events.next().await;
760                }
761            }
762            WaitUntil::NetworkIdle => {
763                if let Some(ref mut events) = load_events {
764                    let _ = events.next().await;
765                }
766                if let Some(ref counter) = inflight {
767                    Self::wait_network_idle(counter).await;
768                }
769            }
770            WaitUntil::Selector(css) => {
771                self.wait_for_selector(css, nav_timeout).await?;
772            }
773        }
774        Ok(())
775    }
776
777    /// Spawn three detached tasks that maintain a signed in-flight request
778    /// counter via `Network.requestWillBeSent` (+1) and
779    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
780    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
781        use std::sync::atomic::AtomicI32;
782
783        use chromiumoxide::cdp::browser_protocol::network::{
784            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
785        };
786        use futures::StreamExt;
787
788        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
789        let pairs: [(Arc<AtomicI32>, i32); 3] = [
790            (Arc::clone(&counter), 1),
791            (Arc::clone(&counter), -1),
792            (Arc::clone(&counter), -1),
793        ];
794        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
795
796        macro_rules! spawn_tracker {
797            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
798                match $page.event_listener::<$event>().await {
799                    Ok(mut s) => {
800                        let c = $c;
801                        let d = $delta;
802                        tokio::spawn(async move {
803                            while s.next().await.is_some() {
804                                c.fetch_add(d, Ordering::Relaxed);
805                            }
806                        });
807                    }
808                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
809                }
810            };
811        }
812
813        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
814        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
815        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
816        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
817
818        counter
819    }
820
821    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
822        const IDLE_THRESHOLD: i32 = 2;
823        const SETTLE: Duration = Duration::from_millis(500);
824        loop {
825            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
826                tokio::time::sleep(SETTLE).await;
827                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
828                    break;
829                }
830            } else {
831                tokio::time::sleep(Duration::from_millis(50)).await;
832            }
833        }
834    }
835
836    ///
837    /// # Errors
838    ///
839    /// within the given timeout.
840    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
841        let selector_owned = selector.to_string();
842        let poll = async {
843            loop {
844                if self.page.find_element(selector_owned.clone()).await.is_ok() {
845                    return Ok(());
846                }
847                tokio::time::sleep(Duration::from_millis(100)).await;
848            }
849        };
850
851        timeout(wait_timeout, poll)
852            .await
853            .map_err(|_| BrowserError::NavigationFailed {
854                url: String::new(),
855                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
856            })?
857    }
858
859    ///
860    /// Enables `Fetch` interception and spawns a background task that continues
861    /// allowed requests and fails blocked ones with `BlockedByClient`. Any
862    /// previously set filter task is cancelled first.
863    ///
864    /// # Errors
865    ///
866    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
867        use chromiumoxide::cdp::browser_protocol::fetch::{
868            ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
869            RequestPattern,
870        };
871        use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
872        use futures::StreamExt as _;
873
874        if filter.is_empty() {
875            return Ok(());
876        }
877
878        // Cancel any previously running filter task.
879        if let Some(task) = self.resource_filter_task.take() {
880            task.abort();
881        }
882
883        let pattern = RequestPattern::builder().url_pattern("*").build();
884        let params = EnableParams::builder()
885            .patterns(vec![pattern])
886            .handle_auth_requests(false)
887            .build();
888
889        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
890            .await
891            .map_err(|_| BrowserError::Timeout {
892                operation: "Fetch.enable".to_string(),
893                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
894            })?
895            .map_err(|e| BrowserError::CdpError {
896                operation: "Fetch.enable".to_string(),
897                message: e.to_string(),
898            })?;
899
900        // is never blocked. Without this handler Chrome holds every intercepted
901        // request indefinitely and the page hangs.
902        let mut events = self
903            .page
904            .event_listener::<EventRequestPaused>()
905            .await
906            .map_err(|e| BrowserError::CdpError {
907                operation: "Fetch.requestPaused subscribe".to_string(),
908                message: e.to_string(),
909            })?;
910
911        let page = self.page.clone();
912        debug!("Resource filter active: {:?}", filter);
913        let task = tokio::spawn(async move {
914            while let Some(event) = events.next().await {
915                let request_id = event.request_id.clone();
916                if filter.should_block(event.resource_type.as_ref()) {
917                    let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
918                    let _ = page.execute(params).await;
919                } else {
920                    let _ = page.execute(ContinueRequestParams::new(request_id)).await;
921                }
922            }
923        });
924
925        self.resource_filter_task = Some(task);
926        Ok(())
927    }
928
929    /// Return the current page URL (post-navigation, post-redirect).
930    ///
931    /// internally by [`save_cookies`](Self::save_cookies); no extra network
932    /// request is made.  Returns an empty string if the URL is not yet set
933    ///
934    /// # Errors
935    ///
936    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
937    ///
938    /// # Example
939    ///
940    /// ```no_run
941    /// use stygian_browser::{BrowserPool, BrowserConfig};
942    /// use stygian_browser::page::WaitUntil;
943    /// use std::time::Duration;
944    ///
945    /// # async fn run() -> stygian_browser::error::Result<()> {
946    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
947    /// let handle = pool.acquire().await?;
948    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
949    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
950    /// let url = page.url().await?;
951    /// println!("Final URL after redirects: {url}");
952    /// # Ok(())
953    /// # }
954    /// ```
955    pub async fn url(&self) -> Result<String> {
956        timeout(self.cdp_timeout, self.page.url())
957            .await
958            .map_err(|_| BrowserError::Timeout {
959                operation: "page.url".to_string(),
960                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
961            })?
962            .map_err(|e| BrowserError::CdpError {
963                operation: "page.url".to_string(),
964                message: e.to_string(),
965            })
966            .map(Option::unwrap_or_default)
967    }
968
969    /// Return the HTTP status code of the most recent main-frame navigation.
970    ///
971    /// The status is captured from the `Network.responseReceived` CDP event
972    /// wired up inside [`navigate`](Self::navigate), so it reflects the
973    /// *final* response after any server-side redirects.
974    ///
975    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
976    /// or if the network event subscription failed.
977    ///
978    /// # Errors
979    ///
980    ///
981    /// # Example
982    ///
983    /// ```no_run
984    /// use stygian_browser::{BrowserPool, BrowserConfig};
985    /// use stygian_browser::page::WaitUntil;
986    /// use std::time::Duration;
987    ///
988    /// # async fn run() -> stygian_browser::error::Result<()> {
989    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
990    /// let handle = pool.acquire().await?;
991    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
992    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
993    /// if let Some(code) = page.status_code()? {
994    ///     println!("HTTP {code}");
995    /// }
996    /// # Ok(())
997    /// # }
998    /// ```
999    pub fn status_code(&self) -> Result<Option<u16>> {
1000        let code = self.last_status_code.load(Ordering::Acquire);
1001        Ok(if code == 0 { None } else { Some(code) })
1002    }
1003
1004    /// Return the page's `<title>` text.
1005    ///
1006    /// # Errors
1007    ///
1008    pub async fn title(&self) -> Result<String> {
1009        timeout(self.cdp_timeout, self.page.get_title())
1010            .await
1011            .map_err(|_| BrowserError::Timeout {
1012                operation: "get_title".to_string(),
1013                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1014            })?
1015            .map_err(|e| BrowserError::ScriptExecutionFailed {
1016                script: "document.title".to_string(),
1017                reason: e.to_string(),
1018            })
1019            .map(Option::unwrap_or_default)
1020    }
1021
1022    /// Return the page's full outer HTML.
1023    ///
1024    /// # Errors
1025    ///
1026    pub async fn content(&self) -> Result<String> {
1027        timeout(self.cdp_timeout, self.page.content())
1028            .await
1029            .map_err(|_| BrowserError::Timeout {
1030                operation: "page.content".to_string(),
1031                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1032            })?
1033            .map_err(|e| BrowserError::ScriptExecutionFailed {
1034                script: "document.documentElement.outerHTML".to_string(),
1035                reason: e.to_string(),
1036            })
1037    }
1038
1039    /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
1040    ///
1041    /// No HTML serialisation occurs — the browser's in-memory DOM is queried
1042    /// directly over the CDP connection, eliminating the `page.content()` +
1043    /// `scraper::Html::parse_document` round-trip.
1044    ///
1045    ///
1046    /// # Errors
1047    ///
1048    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
1049    ///
1050    /// # Example
1051    ///
1052    /// ```no_run
1053    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1054    /// use std::time::Duration;
1055    ///
1056    /// # async fn run() -> stygian_browser::error::Result<()> {
1057    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1058    /// let handle = pool.acquire().await?;
1059    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1060    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1061    /// # let nodes = page.query_selector_all("div[data-ux]").await?;
1062    /// # for node in &nodes {
1063    ///     let ux_type = node.attr("data-ux").await?;
1064    ///     let text    = node.text_content().await?;
1065    ///     println!("{ux_type:?}: {text}");
1066    /// # }
1067    /// # Ok(())
1068    /// # }
1069    /// ```
1070    pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
1071        let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
1072            .await
1073            .map_err(|_| BrowserError::Timeout {
1074                operation: "PageHandle::query_selector_all".to_string(),
1075                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1076            })?
1077            .map_err(|e| BrowserError::CdpError {
1078                operation: "PageHandle::query_selector_all".to_string(),
1079                message: e.to_string(),
1080            })?;
1081
1082        let selector_arc: Arc<str> = Arc::from(selector);
1083        Ok(elements
1084            .into_iter()
1085            .map(|el| NodeHandle {
1086                element: el,
1087                selector: selector_arc.clone(),
1088                cdp_timeout: self.cdp_timeout,
1089                page: self.page.clone(),
1090            })
1091            .collect())
1092    }
1093
1094    /// Evaluate arbitrary JavaScript and return the result as `T`.
1095    ///
1096    /// # Errors
1097    ///
1098    /// deserialization error.
1099    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
1100        let script_owned = script.to_string();
1101        timeout(self.cdp_timeout, self.page.evaluate(script))
1102            .await
1103            .map_err(|_| BrowserError::Timeout {
1104                operation: "page.evaluate".to_string(),
1105                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1106            })?
1107            .map_err(|e| BrowserError::ScriptExecutionFailed {
1108                script: script_owned.clone(),
1109                reason: e.to_string(),
1110            })?
1111            .into_value::<T>()
1112            .map_err(|e| BrowserError::ScriptExecutionFailed {
1113                script: script_owned,
1114                reason: e.to_string(),
1115            })
1116    }
1117
1118    ///
1119    /// # Errors
1120    ///
1121    pub async fn save_cookies(
1122        &self,
1123    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
1124        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
1125
1126        let url = self
1127            .page
1128            .url()
1129            .await
1130            .map_err(|e| BrowserError::CdpError {
1131                operation: "page.url".to_string(),
1132                message: e.to_string(),
1133            })?
1134            .unwrap_or_default();
1135
1136        timeout(
1137            self.cdp_timeout,
1138            self.page
1139                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
1140        )
1141        .await
1142        .map_err(|_| BrowserError::Timeout {
1143            operation: "Network.getCookies".to_string(),
1144            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1145        })?
1146        .map_err(|e| BrowserError::CdpError {
1147            operation: "Network.getCookies".to_string(),
1148            message: e.to_string(),
1149        })
1150        .map(|r| r.cookies.clone())
1151    }
1152
1153    ///
1154    /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1155    /// requiring a direct `chromiumoxide` dependency in calling code.
1156    ///
1157    /// Individual cookie failures are logged as warnings and do not abort the
1158    /// remaining cookies.
1159    ///
1160    /// # Errors
1161    ///
1162    /// call exceeds `cdp_timeout`.
1163    ///
1164    /// # Example
1165    ///
1166    /// ```no_run
1167    /// use stygian_browser::{BrowserPool, BrowserConfig};
1168    /// use stygian_browser::session::SessionCookie;
1169    /// use std::time::Duration;
1170    ///
1171    /// # async fn run() -> stygian_browser::error::Result<()> {
1172    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1173    /// let handle = pool.acquire().await?;
1174    /// let page = handle.browser().expect("valid browser").new_page().await?;
1175    /// let cookies = vec![SessionCookie {
1176    ///     name: "session".to_string(),
1177    ///     value: "abc123".to_string(),
1178    ///     domain: ".example.com".to_string(),
1179    ///     path: "/".to_string(),
1180    ///     expires: -1.0,
1181    ///     http_only: true,
1182    ///     secure: true,
1183    ///     same_site: "Lax".to_string(),
1184    /// }];
1185    /// page.inject_cookies(&cookies).await?;
1186    /// # Ok(())
1187    /// # }
1188    /// ```
1189    pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1190        use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1191
1192        for cookie in cookies {
1193            let params = match SetCookieParams::builder()
1194                .name(cookie.name.clone())
1195                .value(cookie.value.clone())
1196                .domain(cookie.domain.clone())
1197                .path(cookie.path.clone())
1198                .http_only(cookie.http_only)
1199                .secure(cookie.secure)
1200                .build()
1201            {
1202                Ok(p) => p,
1203                Err(e) => {
1204                    warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1205                    continue;
1206                }
1207            };
1208
1209            match timeout(self.cdp_timeout, self.page.execute(params)).await {
1210                Err(_) => {
1211                    warn!(
1212                        cookie = %cookie.name,
1213                        timeout_ms = self.cdp_timeout.as_millis(),
1214                        "Timed out injecting cookie"
1215                    );
1216                }
1217                Ok(Err(e)) => {
1218                    warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1219                }
1220                Ok(Ok(_)) => {}
1221            }
1222        }
1223
1224        debug!(count = cookies.len(), "Cookies injected");
1225        Ok(())
1226    }
1227
1228    /// Capture a screenshot of the current page as PNG bytes.
1229    ///
1230    /// them in-memory.
1231    ///
1232    /// # Errors
1233    ///
1234    /// command fails, or [`BrowserError::Timeout`] if it exceeds
1235    /// `cdp_timeout`.
1236    ///
1237    /// # Example
1238    ///
1239    /// ```no_run
1240    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1241    /// use std::{time::Duration, fs};
1242    ///
1243    /// # async fn run() -> stygian_browser::error::Result<()> {
1244    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1245    /// let handle = pool.acquire().await?;
1246    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1247    /// let png = page.screenshot().await?;
1248    /// fs::write("screenshot.png", &png).unwrap();
1249    /// # Ok(())
1250    /// # }
1251    /// ```
1252    pub async fn screenshot(&self) -> Result<Vec<u8>> {
1253        use chromiumoxide::page::ScreenshotParams;
1254
1255        let params = ScreenshotParams::builder().full_page(true).build();
1256
1257        timeout(self.cdp_timeout, self.page.screenshot(params))
1258            .await
1259            .map_err(|_| BrowserError::Timeout {
1260                operation: "Page.captureScreenshot".to_string(),
1261                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1262            })?
1263            .map_err(|e| BrowserError::CdpError {
1264                operation: "Page.captureScreenshot".to_string(),
1265                message: e.to_string(),
1266            })
1267    }
1268
1269    /// Borrow the underlying chromiumoxide [`Page`].
1270    pub const fn inner(&self) -> &Page {
1271        &self.page
1272    }
1273
1274    /// Close this page (tab).
1275    ///
1276    pub async fn close(self) -> Result<()> {
1277        timeout(Duration::from_secs(5), self.page.clone().close())
1278            .await
1279            .map_err(|_| BrowserError::Timeout {
1280                operation: "page.close".to_string(),
1281                duration_ms: 5000,
1282            })?
1283            .map_err(|e| BrowserError::CdpError {
1284                operation: "page.close".to_string(),
1285                message: e.to_string(),
1286            })
1287    }
1288}
1289
1290// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1291
1292#[cfg(feature = "stealth")]
1293impl PageHandle {
1294    /// Run all built-in stealth detection checks against the current page.
1295    ///
1296    /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1297    /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1298    /// [`crate::diagnostic::DiagnosticReport`].
1299    ///
1300    /// recorded as failing checks and do **not** abort the whole run.
1301    ///
1302    /// # Errors
1303    ///
1304    /// Individual check failures are captured in the report.
1305    ///
1306    /// # Example
1307    ///
1308    /// ```no_run
1309    /// # async fn run() -> stygian_browser::error::Result<()> {
1310    /// use stygian_browser::{BrowserPool, BrowserConfig};
1311    /// use stygian_browser::page::WaitUntil;
1312    /// use std::time::Duration;
1313    ///
1314    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1315    /// let handle = pool.acquire().await?;
1316    /// let browser = handle.browser().expect("valid browser");
1317    /// let mut page = browser.new_page().await?;
1318    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1319    ///
1320    /// let report = page.verify_stealth().await?;
1321    /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1322    /// # for failure in report.failures() {
1323    ///     eprintln!("  FAIL  {}: {}", failure.description, failure.details);
1324    /// # }
1325    /// # Ok(())
1326    /// # }
1327    /// ```
1328    pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1329        use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks, all_limitation_probes};
1330
1331        let mut results: Vec<CheckResult> = Vec::new();
1332        let mut known_limitations = Vec::new();
1333
1334        for check in all_checks() {
1335            let result = match self.eval::<String>(check.script).await {
1336                Ok(json) => check.parse_output(&json),
1337                Err(e) => {
1338                    tracing::warn!(
1339                        check = ?check.id,
1340                        error = %e,
1341                        "stealth check script failed during evaluation"
1342                    );
1343                    CheckResult {
1344                        id: check.id,
1345                        description: check.description.to_string(),
1346                        passed: false,
1347                        details: format!("script error: {e}"),
1348                    }
1349                }
1350            };
1351            tracing::debug!(
1352                check = ?result.id,
1353                passed = result.passed,
1354                details = %result.details,
1355                "stealth check result"
1356            );
1357            results.push(result);
1358        }
1359
1360        for probe in all_limitation_probes() {
1361            let limitation = match self.eval::<String>(probe.script).await {
1362                Ok(json) => probe.parse_output(&json),
1363                Err(error) => Some(crate::diagnostic::KnownLimitation {
1364                    id: probe.id,
1365                    description: probe.description.to_string(),
1366                    details: format!("script error: {error}"),
1367                }),
1368            };
1369            if let Some(limitation) = limitation {
1370                tracing::debug!(
1371                    limitation = ?limitation.id,
1372                    details = %limitation.details,
1373                    "stealth limitation observed"
1374                );
1375                known_limitations.push(limitation);
1376            }
1377        }
1378
1379        Ok(DiagnosticReport::new(results).with_known_limitations(known_limitations))
1380    }
1381
1382    /// Run stealth checks and attach transport diagnostics (JA3/JA4/HTTP3).
1383    ///
1384    pub async fn verify_stealth_with_transport(
1385        &self,
1386        observed: Option<crate::diagnostic::TransportObservations>,
1387    ) -> Result<crate::diagnostic::DiagnosticReport> {
1388        let report = self.verify_stealth().await?;
1389
1390        let user_agent = match self.eval::<String>("navigator.userAgent").await {
1391            Ok(ua) => ua,
1392            Err(e) => {
1393                tracing::warn!(error = %e, "failed to read navigator.userAgent for transport diagnostics");
1394                String::new()
1395            }
1396        };
1397
1398        let transport = crate::diagnostic::TransportDiagnostic::from_user_agent_and_observations(
1399            &user_agent,
1400            observed.as_ref(),
1401        );
1402
1403        Ok(report.with_transport(transport))
1404    }
1405}
1406
1407// ─── extract feature ─────────────────────────────────────────────────────────
1408
1409#[cfg(feature = "extract")]
1410impl PageHandle {
1411    ///
1412    ///
1413    /// All per-node extractions are driven concurrently via
1414    /// [`futures::future::try_join_all`].
1415    ///
1416    /// # Errors
1417    ///
1418    /// fails, or [`BrowserError::ExtractionFailed`] if any field extraction
1419    /// fails.
1420    ///
1421    /// # Example
1422    ///
1423    /// ```ignore
1424    /// use stygian_browser::extract::Extract;
1425    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1426    /// use std::time::Duration;
1427    ///
1428    /// #[derive(Extract)]
1429    /// struct Link {
1430    ///     href: Option<String>,
1431    /// }
1432    ///
1433    /// # async fn run() -> stygian_browser::error::Result<()> {
1434    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1435    /// let handle = pool.acquire().await?;
1436    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1437    /// page.navigate(
1438    ///     "https://example.com",
1439    ///     WaitUntil::DomContentLoaded,
1440    ///     Duration::from_secs(30),
1441    /// ).await?;
1442    /// let links: Vec<Link> = page.extract_all::<Link>("nav li").await?;
1443    /// # Ok(())
1444    /// # }
1445    /// ```
1446    pub async fn extract_all<T>(&self, selector: &str) -> Result<Vec<T>>
1447    where
1448        T: crate::extract::Extractable,
1449    {
1450        use futures::future::try_join_all;
1451
1452        let nodes = self.query_selector_all(selector).await?;
1453        try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1454            .await
1455            .map_err(BrowserError::ExtractionFailed)
1456    }
1457
1458    /// Try each selector in `selectors` in order and return the extracted
1459    /// results from the **first** selector that matches at least one node.
1460    ///
1461    /// This is useful when a page may use different markup across versions or
1462    /// A/B variants — supply the preferred selector first and progressively
1463    /// wider fallbacks afterwards.
1464    ///
1465    /// Returns an empty `Vec` only when *all* selectors match zero nodes
1466    /// (i.e. the element is genuinely absent from the page).  A non-empty
1467    /// intermediate selector result that then fails during extraction **will**
1468    /// return an error.
1469    ///
1470    /// # Errors
1471    ///
1472    /// Returns [`BrowserError::CdpError`] if the selector query fails, or
1473    /// [`BrowserError::ExtractionFailed`] if a matched node fails extraction.
1474    ///
1475    /// # Example
1476    ///
1477    /// ```ignore
1478    /// use stygian_browser::extract::Extract;
1479    ///
1480    /// #[derive(Extract)]
1481    /// struct Headline { title: String }
1482    ///
1483    /// # async fn run(page: &stygian_browser::PageHandle) -> stygian_browser::error::Result<()> {
1484    /// // Try modern selector first, fall back to legacy markup.
1485    /// let items = page
1486    ///     .extract_all_with_fallback::<Headline>(&["h2.headline", "h2.title", "h2"])
1487    ///     .await?;
1488    /// # Ok(())
1489    /// # }
1490    /// ```
1491    pub async fn extract_all_with_fallback<T>(&self, selectors: &[&str]) -> Result<Vec<T>>
1492    where
1493        T: crate::extract::Extractable,
1494    {
1495        use futures::future::try_join_all;
1496
1497        for &selector in selectors {
1498            let nodes = self.query_selector_all(selector).await?;
1499            if nodes.is_empty() {
1500                continue;
1501            }
1502            return try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1503                .await
1504                .map_err(BrowserError::ExtractionFailed);
1505        }
1506
1507        Ok(vec![])
1508    }
1509
1510    /// Extract from every node matching `selector`, **skipping** nodes where
1511    /// a required field is absent (i.e. [`ExtractionError::Missing`]).
1512    ///
1513    /// Unlike [`extract_all`], this method is lenient about structural
1514    /// mismatches: nodes that fail with [`ExtractionError::Missing`] are
1515    /// silently dropped from the result set.  All other extraction errors
1516    /// (CDP failures, stale nodes, nested errors) still propagate as hard
1517    /// failures.
1518    ///
1519    /// This is useful when scraping heterogeneous lists where some items
1520    /// lack an optional field that your struct treats as required.
1521    ///
1522    /// [`extract_all`]: Self::extract_all
1523    /// [`ExtractionError::Missing`]: crate::extract::ExtractionError::Missing
1524    ///
1525    /// # Errors
1526    ///
1527    /// Returns [`BrowserError::CdpError`] if the selector query fails, or
1528    /// [`BrowserError::ExtractionFailed`] for non-`Missing` extraction errors.
1529    ///
1530    /// # Example
1531    ///
1532    /// ```ignore
1533    /// use stygian_browser::extract::Extract;
1534    ///
1535    /// #[derive(Extract)]
1536    /// struct Price { amount: String }
1537    ///
1538    /// # async fn run(page: &stygian_browser::PageHandle) -> stygian_browser::error::Result<()> {
1539    /// // Products without a price tag are silently skipped.
1540    /// let prices = page.extract_resilient::<Price>(".product").await?;
1541    /// # Ok(())
1542    /// # }
1543    /// ```
1544    pub async fn extract_resilient<T>(&self, selector: &str) -> Result<Vec<T>>
1545    where
1546        T: crate::extract::Extractable,
1547    {
1548        use crate::extract::ExtractionError;
1549
1550        let nodes = self.query_selector_all(selector).await?;
1551        let mut results = Vec::with_capacity(nodes.len());
1552
1553        for node in &nodes {
1554            match T::extract_from(node).await {
1555                Ok(item) => results.push(item),
1556                Err(ExtractionError::Missing { .. }) => {
1557                    tracing::debug!(
1558                        selector,
1559                        "extract_resilient: skipping node with missing required field"
1560                    );
1561                }
1562                Err(e) => return Err(BrowserError::ExtractionFailed(e)),
1563            }
1564        }
1565
1566        Ok(results)
1567    }
1568}
1569
1570// ─── similarity feature ──────────────────────────────────────────────────────
1571
1572#[cfg(feature = "similarity")]
1573impl NodeHandle {
1574    /// node.
1575    ///
1576    /// Issues a single `Runtime.callFunctionOn` JS eval that extracts the tag,
1577    /// class list, attribute names, and body-depth in one round-trip.
1578    ///
1579    /// # Errors
1580    ///
1581    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] if the script
1582    /// produces unexpected output.
1583    pub async fn fingerprint(&self) -> Result<crate::similarity::ElementFingerprint> {
1584        const JS: &str = r"function() {
1585    var el = this;
1586    var tag = el.tagName.toLowerCase();
1587    var classes = Array.prototype.slice.call(el.classList).sort();
1588    var attrNames = Array.prototype.slice.call(el.attributes)
1589        .map(function(a) { return a.name; })
1590        .filter(function(n) { return n !== 'class' && n !== 'id'; })
1591        .sort();
1592    var depth = 0;
1593    var n = el.parentElement;
1594    while (n && n.tagName.toLowerCase() !== 'body') { depth++; n = n.parentElement; }
1595    return JSON.stringify({ tag: tag, classes: classes, attrNames: attrNames, depth: depth });
1596}";
1597
1598        let returns = tokio::time::timeout(self.cdp_timeout, self.element.call_js_fn(JS, true))
1599            .await
1600            .map_err(|_| BrowserError::Timeout {
1601                operation: "NodeHandle::fingerprint".to_string(),
1602                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1603            })?
1604            .map_err(|e| self.cdp_err_or_stale(&e, "fingerprint"))?;
1605
1606        let json_str = returns
1607            .result
1608            .value
1609            .as_ref()
1610            .and_then(|v| v.as_str())
1611            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
1612                script: "NodeHandle::fingerprint".to_string(),
1613                reason: "CDP returned no string value from fingerprint script".to_string(),
1614            })?;
1615
1616        serde_json::from_str::<crate::similarity::ElementFingerprint>(json_str).map_err(|e| {
1617            BrowserError::ScriptExecutionFailed {
1618                script: "NodeHandle::fingerprint".to_string(),
1619                reason: format!("failed to deserialise fingerprint JSON: {e}"),
1620            }
1621        })
1622    }
1623}
1624
1625#[cfg(feature = "similarity")]
1626impl PageHandle {
1627    /// `reference`, scored by [`crate::similarity::SimilarityConfig`].
1628    ///
1629    /// [`NodeHandle::fingerprint`]), then fingerprints every candidate returned
1630    /// [`crate::similarity::jaccard_weighted`] score exceeds
1631    /// `config.threshold`.  Results are ordered by score descending.
1632    ///
1633    /// # Example
1634    ///
1635    /// ```no_run
1636    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1637    /// use stygian_browser::similarity::SimilarityConfig;
1638    /// use std::time::Duration;
1639    ///
1640    /// # async fn run() -> stygian_browser::error::Result<()> {
1641    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1642    /// let handle = pool.acquire().await?;
1643    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1644    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1645    ///
1646    /// # let nodes = page.query_selector_all("h1").await?;
1647    /// # let reference = nodes.into_iter().next().ok_or(stygian_browser::error::BrowserError::StaleNode { selector: "h1".to_string() })?;
1648    ///     let similar = page.find_similar(&reference, SimilarityConfig::default()).await?;
1649    /// # for m in &similar {
1650    ///         println!("score={:.2}", m.score);
1651    /// # }
1652    /// # Ok(())
1653    /// # }
1654    /// ```
1655    ///
1656    /// # Errors
1657    ///
1658    /// [`BrowserError::ScriptExecutionFailed`] if a scoring script fails.
1659    pub async fn find_similar(
1660        &self,
1661        reference: &NodeHandle,
1662        config: crate::similarity::SimilarityConfig,
1663    ) -> Result<Vec<crate::similarity::SimilarMatch>> {
1664        use crate::similarity::{SimilarMatch, jaccard_weighted};
1665
1666        let ref_fp = reference.fingerprint().await?;
1667        let candidates = self.query_selector_all("*").await?;
1668
1669        let mut matches: Vec<SimilarMatch> = Vec::new();
1670        for node in candidates {
1671            if let Ok(cand_fp) = node.fingerprint().await {
1672                let score = jaccard_weighted(&ref_fp, &cand_fp);
1673                if score >= config.threshold {
1674                    matches.push(SimilarMatch { node, score });
1675                }
1676            }
1677            // Stale / detached nodes are silently skipped.
1678        }
1679
1680        matches.sort_by(|a, b| {
1681            b.score
1682                .partial_cmp(&a.score)
1683                .unwrap_or(std::cmp::Ordering::Equal)
1684        });
1685
1686        if config.max_results > 0 {
1687            matches.truncate(config.max_results);
1688        }
1689
1690        Ok(matches)
1691    }
1692}
1693
1694impl Drop for PageHandle {
1695    fn drop(&mut self) {
1696        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
1697        // chromiumoxide Page does not implement close on Drop, so we spawn
1698        // swap it out. We clone the Page handle (it's Arc-backed internally).
1699        let page = self.page.clone();
1700        tokio::spawn(async move {
1701            let _ = page.close().await;
1702        });
1703    }
1704}
1705
1706// ─── Session warmup & refresh ─────────────────────────────────────────────────
1707
1708/// Simplified, JSON-serializable wait strategy used in [`WarmupOptions`] and
1709/// [`RefreshOptions`].
1710///
1711/// This is a serialization-friendly analogue of [`WaitUntil`].  Use
1712/// [`WarmupWait::into_wait_until`] to convert before calling
1713/// [`PageHandle::navigate`].
1714#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
1715#[serde(rename_all = "snake_case")]
1716pub enum WarmupWait {
1717    /// Wait until the HTML is fully parsed (`DOMContentLoaded`).  This is the
1718    /// default and works for most pages.
1719    #[default]
1720    DomContentLoaded,
1721    /// Wait until there are no more than two in-flight network requests for at
1722    /// least 500 ms after navigation.
1723    NetworkIdle,
1724}
1725
1726impl WarmupWait {
1727    /// Convert into the lower-level [`WaitUntil`] enum.
1728    #[must_use]
1729    pub const fn into_wait_until(self) -> WaitUntil {
1730        match self {
1731            Self::DomContentLoaded => WaitUntil::DomContentLoaded,
1732            Self::NetworkIdle => WaitUntil::NetworkIdle,
1733        }
1734    }
1735}
1736
1737/// Options for [`PageHandle::warmup`].
1738///
1739/// # Example
1740///
1741/// ```
1742/// use stygian_browser::page::{WarmupOptions, WarmupWait};
1743///
1744/// let opts = WarmupOptions {
1745///     url: "https://example.com".to_string(),
1746///     wait: WarmupWait::DomContentLoaded,
1747///     timeout_ms: 30_000,
1748///     stabilize_ms: 500,
1749/// };
1750/// assert_eq!(opts.timeout_ms, 30_000);
1751/// ```
1752#[derive(Debug, Clone, Serialize, Deserialize)]
1753pub struct WarmupOptions {
1754    /// The URL to navigate to during warmup.
1755    pub url: String,
1756    /// Wait strategy applied after the navigation commit (default:
1757    /// `DomContentLoaded`).
1758    #[serde(default)]
1759    pub wait: WarmupWait,
1760    /// Navigation timeout in milliseconds.  Default: `30 000`.
1761    #[serde(default = "WarmupOptions::default_timeout_ms")]
1762    pub timeout_ms: u64,
1763    /// Additional pause after navigation to let dynamic resources (XHR,
1764    /// lazy-loaded images) settle, in milliseconds.  `0` disables the
1765    /// stabilization step (default).
1766    #[serde(default)]
1767    pub stabilize_ms: u64,
1768}
1769
1770impl WarmupOptions {
1771    /// Returns the default navigation timeout (30 000 ms).
1772    #[must_use]
1773    pub const fn default_timeout_ms() -> u64 {
1774        30_000
1775    }
1776}
1777
1778impl Default for WarmupOptions {
1779    fn default() -> Self {
1780        Self {
1781            url: String::new(),
1782            wait: WarmupWait::DomContentLoaded,
1783            timeout_ms: Self::default_timeout_ms(),
1784            stabilize_ms: 0,
1785        }
1786    }
1787}
1788
1789/// Diagnostic report produced by [`PageHandle::warmup`].
1790///
1791/// # Example
1792///
1793/// ```
1794/// use stygian_browser::page::WarmupReport;
1795/// let report = WarmupReport {
1796///     url: "https://example.com".to_string(),
1797///     elapsed_ms: 250,
1798///     status_code: Some(200),
1799///     title: "Example Domain".to_string(),
1800///     stabilized: false,
1801/// };
1802/// assert_eq!(report.status_code, Some(200));
1803/// ```
1804#[derive(Debug, Clone, Serialize, Deserialize)]
1805pub struct WarmupReport {
1806    /// The URL that was warmed.
1807    pub url: String,
1808    /// Elapsed wall-time in milliseconds.
1809    pub elapsed_ms: u64,
1810    /// HTTP status code of the warmup navigation, if captured by the
1811    /// `Network.responseReceived` listener.
1812    pub status_code: Option<u16>,
1813    /// Page title after warmup navigation.
1814    pub title: String,
1815    /// Whether a stabilization pause (`stabilize_ms > 0`) was applied after
1816    /// navigation.
1817    pub stabilized: bool,
1818}
1819
1820/// Options for [`PageHandle::refresh`].
1821///
1822/// # Example
1823///
1824/// ```
1825/// use stygian_browser::page::{RefreshOptions, WarmupWait};
1826///
1827/// let opts = RefreshOptions {
1828///     wait: WarmupWait::DomContentLoaded,
1829///     timeout_ms: 15_000,
1830///     reset_connection: true,
1831/// };
1832/// assert!(opts.reset_connection);
1833/// ```
1834#[derive(Debug, Clone, Serialize, Deserialize)]
1835pub struct RefreshOptions {
1836    /// Wait strategy applied after the reload (default: `DomContentLoaded`).
1837    #[serde(default)]
1838    pub wait: WarmupWait,
1839    /// Reload timeout in milliseconds.  Default: `30 000`.
1840    #[serde(default = "RefreshOptions::default_timeout_ms")]
1841    pub timeout_ms: u64,
1842    /// When `true`, re-navigates to the current URL rather than issuing a
1843    /// browser-level reload.  This signals to the calling code that a new TCP
1844    /// connection is desired while cookies and storage are retained in the
1845    /// browser process.  Default: `false`.
1846    #[serde(default)]
1847    pub reset_connection: bool,
1848}
1849
1850impl RefreshOptions {
1851    /// Returns the default reload timeout (30 000 ms).
1852    #[must_use]
1853    pub const fn default_timeout_ms() -> u64 {
1854        30_000
1855    }
1856}
1857
1858impl Default for RefreshOptions {
1859    fn default() -> Self {
1860        Self {
1861            wait: WarmupWait::DomContentLoaded,
1862            timeout_ms: Self::default_timeout_ms(),
1863            reset_connection: false,
1864        }
1865    }
1866}
1867
1868/// Diagnostic report produced by [`PageHandle::refresh`].
1869///
1870/// # Example
1871///
1872/// ```
1873/// use stygian_browser::page::RefreshReport;
1874/// let report = RefreshReport {
1875///     url: "https://example.com".to_string(),
1876///     elapsed_ms: 180,
1877///     status_code: Some(200),
1878/// };
1879/// assert_eq!(report.elapsed_ms, 180);
1880/// ```
1881#[derive(Debug, Clone, Serialize, Deserialize)]
1882pub struct RefreshReport {
1883    /// URL of the page after the refresh navigation.
1884    pub url: String,
1885    /// Elapsed wall-time in milliseconds.
1886    pub elapsed_ms: u64,
1887    /// HTTP status code of the refresh navigation, if captured.
1888    pub status_code: Option<u16>,
1889}
1890
1891// ─── PageHandle warmup / refresh ──────────────────────────────────────────────
1892
1893impl PageHandle {
1894    /// Warm up a browser session by navigating to `options.url` and
1895    /// optionally waiting for dynamic resources to settle.
1896    ///
1897    /// Warmup is **idempotent**: calling it repeatedly re-navigates and
1898    /// re-warms the same session without adverse side effects.
1899    ///
1900    /// # Errors
1901    ///
1902    /// Returns [`BrowserError::NavigationFailed`] if the navigation times out
1903    /// or the underlying CDP call fails.
1904    ///
1905    /// # Example
1906    ///
1907    /// ```no_run
1908    /// # async fn run() -> stygian_browser::error::Result<()> {
1909    /// use stygian_browser::{BrowserPool, BrowserConfig};
1910    /// use stygian_browser::page::{WarmupOptions, WarmupWait};
1911    ///
1912    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1913    /// let handle = pool.acquire().await?;
1914    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1915    ///
1916    /// let report = page.warmup(WarmupOptions {
1917    ///     url: "https://example.com".to_string(),
1918    ///     wait: WarmupWait::DomContentLoaded,
1919    ///     timeout_ms: 30_000,
1920    ///     stabilize_ms: 500,
1921    /// }).await?;
1922    /// println!("warmed in {}ms: {}", report.elapsed_ms, report.title);
1923    /// handle.release().await;
1924    /// # Ok(())
1925    /// # }
1926    /// ```
1927    pub async fn warmup(&mut self, options: WarmupOptions) -> Result<WarmupReport> {
1928        let start = std::time::Instant::now();
1929        let nav_timeout = Duration::from_millis(options.timeout_ms);
1930        self.navigate(
1931            &options.url,
1932            options.wait.clone().into_wait_until(),
1933            nav_timeout,
1934        )
1935        .await?;
1936        let status_code = self.status_code()?;
1937        let title = self.title().await.unwrap_or_default();
1938        let stabilized = options.stabilize_ms > 0;
1939        if stabilized {
1940            tokio::time::sleep(Duration::from_millis(options.stabilize_ms)).await;
1941        }
1942        let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
1943        Ok(WarmupReport {
1944            url: options.url,
1945            elapsed_ms,
1946            status_code,
1947            title,
1948            stabilized,
1949        })
1950    }
1951
1952    /// Refresh the current page, retaining all in-browser session state
1953    /// (cookies, `localStorage`, `sessionStorage`).
1954    ///
1955    /// When `options.reset_connection` is `false` (default) a standard
1956    /// CDP reload is issued.  When `true`, the current URL is re-navigated,
1957    /// which expresses the caller's intent to force a new underlying TCP/TLS
1958    /// connection while keeping all browser-side state intact.
1959    ///
1960    /// Refresh is **idempotent**: repeated calls simply reload the page again.
1961    ///
1962    /// # Errors
1963    ///
1964    /// Returns [`BrowserError::NavigationFailed`] if the current URL cannot be
1965    /// determined or the reload times out.
1966    ///
1967    /// # Example
1968    ///
1969    /// ```no_run
1970    /// # async fn run() -> stygian_browser::error::Result<()> {
1971    /// use stygian_browser::{BrowserPool, BrowserConfig};
1972    /// use stygian_browser::page::{RefreshOptions, WaitUntil};
1973    ///
1974    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1975    /// let handle = pool.acquire().await?;
1976    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1977    /// page.navigate(
1978    ///     "https://example.com",
1979    ///     WaitUntil::DomContentLoaded,
1980    ///     std::time::Duration::from_secs(30),
1981    /// ).await?;
1982    ///
1983    /// let report = page.refresh(RefreshOptions::default()).await?;
1984    /// println!("refreshed in {}ms", report.elapsed_ms);
1985    /// handle.release().await;
1986    /// # Ok(())
1987    /// # }
1988    /// ```
1989    pub async fn refresh(&mut self, options: RefreshOptions) -> Result<RefreshReport> {
1990        let start = std::time::Instant::now();
1991        let nav_timeout = Duration::from_millis(options.timeout_ms);
1992        let wait = options.wait.clone().into_wait_until();
1993        // Resolve the current URL before any navigation changes it.
1994        let current_url = self.url().await?;
1995        if current_url.is_empty() || current_url == "about:blank" {
1996            return Err(BrowserError::NavigationFailed {
1997                url: current_url,
1998                reason: "page has not been navigated yet; call warmup() or navigate() first"
1999                    .to_string(),
2000            });
2001        }
2002        // Both code paths navigate to the same URL.  `reset_connection: true`
2003        // expresses the *intent* to use a new TCP connection; the browser is free
2004        // to reuse or create a new connection as its connection pool dictates.
2005        self.navigate(&current_url, wait, nav_timeout).await?;
2006        let status_code = self.status_code()?;
2007        let url = self.url().await?;
2008        let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
2009        Ok(RefreshReport {
2010            url,
2011            elapsed_ms,
2012            status_code,
2013        })
2014    }
2015}
2016
2017// ─── Tests ────────────────────────────────────────────────────────────────────
2018
2019#[cfg(test)]
2020mod tests {
2021    use super::*;
2022
2023    #[test]
2024    fn resource_filter_block_media_blocks_image() {
2025        let filter = ResourceFilter::block_media();
2026        assert!(filter.should_block("Image"));
2027        assert!(filter.should_block("Font"));
2028        assert!(filter.should_block("Stylesheet"));
2029        assert!(filter.should_block("Media"));
2030        assert!(!filter.should_block("Script"));
2031        assert!(!filter.should_block("XHR"));
2032    }
2033
2034    #[test]
2035    fn resource_filter_case_insensitive() {
2036        let filter = ResourceFilter::block_images_and_fonts();
2037        assert!(filter.should_block("image")); // lowercase
2038        assert!(filter.should_block("IMAGE")); // uppercase
2039        assert!(!filter.should_block("Stylesheet"));
2040    }
2041
2042    #[test]
2043    fn resource_filter_builder_chain() {
2044        let filter = ResourceFilter::default()
2045            .block(ResourceType::Image)
2046            .block(ResourceType::Font);
2047        assert!(filter.should_block("Image"));
2048        assert!(filter.should_block("Font"));
2049        assert!(!filter.should_block("Stylesheet"));
2050    }
2051
2052    #[test]
2053    fn resource_filter_dedup_block() {
2054        let filter = ResourceFilter::default()
2055            .block(ResourceType::Image)
2056            .block(ResourceType::Image); // duplicate
2057        assert_eq!(filter.blocked.len(), 1);
2058    }
2059
2060    #[test]
2061    fn resource_filter_is_empty_when_default() {
2062        assert!(ResourceFilter::default().is_empty());
2063        assert!(!ResourceFilter::block_media().is_empty());
2064    }
2065
2066    #[test]
2067    fn wait_until_selector_stores_string() {
2068        let w = WaitUntil::Selector("#foo".to_string());
2069        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
2070    }
2071
2072    #[test]
2073    fn resource_type_cdp_str() {
2074        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
2075        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
2076        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
2077        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
2078    }
2079
2080    #[test]
2081    fn page_handle_is_send_sync() {
2082        fn assert_send<T: Send>() {}
2083        fn assert_sync<T: Sync>() {}
2084        assert_send::<PageHandle>();
2085        assert_sync::<PageHandle>();
2086    }
2087
2088    /// Verify the resilient extractor correctly classifies `ExtractionError`
2089    /// variants — `Missing` must be treated as "skip", others as hard errors.
2090    #[cfg(feature = "extract")]
2091    #[test]
2092    fn extraction_error_missing_is_skippable() {
2093        use crate::extract::ExtractionError;
2094
2095        let missing = ExtractionError::Missing {
2096            field: "title",
2097            selector: "h1",
2098        };
2099        assert!(
2100            matches!(missing, ExtractionError::Missing { .. }),
2101            "ExtractionError::Missing should be the skip variant"
2102        );
2103
2104        // Non-Missing variants should NOT match the skip pattern
2105        let nested = ExtractionError::Nested {
2106            field: "link",
2107            source: Box::new(ExtractionError::Missing {
2108                field: "href",
2109                selector: "a",
2110            }),
2111        };
2112        assert!(
2113            !matches!(nested, ExtractionError::Missing { .. }),
2114            "ExtractionError::Nested must not match Missing"
2115        );
2116    }
2117
2118    /// `Option<u16>` are pure-logic invariants testable without a live browser.
2119    #[test]
2120    fn status_code_sentinel_zero_maps_to_none() {
2121        use std::sync::atomic::{AtomicU16, Ordering};
2122        let atom = AtomicU16::new(0);
2123        let code = atom.load(Ordering::Acquire);
2124        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
2125    }
2126
2127    #[test]
2128    fn status_code_non_zero_maps_to_some() {
2129        use std::sync::atomic::{AtomicU16, Ordering};
2130        for &expected in &[200u16, 301, 404, 503] {
2131            let atom = AtomicU16::new(expected);
2132            let code = atom.load(Ordering::Acquire);
2133            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
2134        }
2135    }
2136
2137    // ── NodeHandle pure-logic tests ───────────────────────────────────────────
2138
2139    /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
2140    /// correct without a live browser by exercising it directly.
2141    #[test]
2142    fn attr_map_chunking_pairs_correctly() {
2143        let flat = [
2144            "id".to_string(),
2145            "main".to_string(),
2146            "data-ux".to_string(),
2147            "Section".to_string(),
2148            "class".to_string(),
2149            "container".to_string(),
2150        ];
2151        let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
2152        for pair in flat.chunks_exact(2) {
2153            if let [name, value] = pair {
2154                map.insert(name.clone(), value.clone());
2155            }
2156        }
2157        assert_eq!(map.get("id").map(String::as_str), Some("main"));
2158        assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
2159        assert_eq!(map.get("class").map(String::as_str), Some("container"));
2160        assert_eq!(map.len(), 3);
2161    }
2162
2163    /// gracefully — the trailing element is silently ignored.
2164    #[test]
2165    fn attr_map_chunking_ignores_odd_trailing() {
2166        let flat = ["orphan".to_string()]; // no value
2167        let mut map = std::collections::HashMap::new();
2168        for pair in flat.chunks_exact(2) {
2169            if let [name, value] = pair {
2170                map.insert(name.clone(), value.clone());
2171            }
2172        }
2173        assert!(map.is_empty());
2174    }
2175
2176    /// Empty flat list → empty map.
2177    #[test]
2178    fn attr_map_chunking_empty_input() {
2179        let flat: Vec<String> = vec![];
2180        let map: std::collections::HashMap<String, String> = flat
2181            .chunks_exact(2)
2182            .filter_map(|pair| {
2183                if let [name, value] = pair {
2184                    Some((name.clone(), value.clone()))
2185                } else {
2186                    None
2187                }
2188            })
2189            .collect();
2190        assert!(map.is_empty());
2191    }
2192
2193    #[test]
2194    fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
2195        let json = r#"["p","article","body","html"]"#;
2196        let result: Vec<String> = serde_json::from_str(json)?;
2197        assert_eq!(result, ["p", "article", "body", "html"]);
2198        Ok(())
2199    }
2200
2201    #[test]
2202    fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
2203        let json = "[]";
2204        let result: Vec<String> = serde_json::from_str(json)?;
2205        assert!(result.is_empty());
2206        Ok(())
2207    }
2208
2209    /// `"div::parent"`) must surface that suffix in its `Display` output so
2210    /// callers can locate the failed traversal in logs.
2211    #[test]
2212    fn traversal_selector_suffix_in_stale_error() {
2213        let e = crate::error::BrowserError::StaleNode {
2214            selector: "div::parent".to_string(),
2215        };
2216        let msg = e.to_string();
2217        assert!(
2218            msg.contains("div::parent"),
2219            "StaleNode display must include the full selector; got: {msg}"
2220        );
2221    }
2222
2223    #[test]
2224    fn traversal_next_suffix_in_stale_error() {
2225        let e = crate::error::BrowserError::StaleNode {
2226            selector: "li.price::next".to_string(),
2227        };
2228        assert!(e.to_string().contains("li.price::next"));
2229    }
2230
2231    #[test]
2232    fn traversal_prev_suffix_in_stale_error() {
2233        let e = crate::error::BrowserError::StaleNode {
2234            selector: "td.label::prev".to_string(),
2235        };
2236        assert!(e.to_string().contains("td.label::prev"));
2237    }
2238
2239    // ── Warmup / Refresh type tests ───────────────────────────────────────────
2240
2241    #[test]
2242    fn warmup_options_defaults() {
2243        let opts = WarmupOptions::default();
2244        assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
2245        assert_eq!(opts.timeout_ms, WarmupOptions::default_timeout_ms());
2246        assert_eq!(opts.stabilize_ms, 0);
2247    }
2248
2249    #[test]
2250    fn warmup_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
2251    {
2252        let opts = WarmupOptions {
2253            url: "https://example.com".to_string(),
2254            wait: WarmupWait::NetworkIdle,
2255            timeout_ms: 15_000,
2256            stabilize_ms: 250,
2257        };
2258        let json = serde_json::to_string(&opts)?;
2259        let restored: WarmupOptions = serde_json::from_str(&json)?;
2260        assert_eq!(restored.url, "https://example.com");
2261        assert_eq!(restored.wait, WarmupWait::NetworkIdle);
2262        assert_eq!(restored.timeout_ms, 15_000);
2263        assert_eq!(restored.stabilize_ms, 250);
2264        Ok(())
2265    }
2266
2267    #[test]
2268    fn warmup_wait_default_is_dom_content_loaded() {
2269        assert_eq!(WarmupWait::default(), WarmupWait::DomContentLoaded);
2270    }
2271
2272    #[test]
2273    fn warmup_wait_into_wait_until_variants() {
2274        assert!(matches!(
2275            WarmupWait::DomContentLoaded.into_wait_until(),
2276            WaitUntil::DomContentLoaded
2277        ));
2278        assert!(matches!(
2279            WarmupWait::NetworkIdle.into_wait_until(),
2280            WaitUntil::NetworkIdle
2281        ));
2282    }
2283
2284    #[test]
2285    fn refresh_options_defaults() {
2286        let opts = RefreshOptions::default();
2287        assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
2288        assert_eq!(opts.timeout_ms, RefreshOptions::default_timeout_ms());
2289        assert!(!opts.reset_connection);
2290    }
2291
2292    #[test]
2293    fn refresh_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
2294    {
2295        let opts = RefreshOptions {
2296            wait: WarmupWait::NetworkIdle,
2297            timeout_ms: 10_000,
2298            reset_connection: true,
2299        };
2300        let json = serde_json::to_string(&opts)?;
2301        let restored: RefreshOptions = serde_json::from_str(&json)?;
2302        assert_eq!(restored.wait, WarmupWait::NetworkIdle);
2303        assert_eq!(restored.timeout_ms, 10_000);
2304        assert!(restored.reset_connection);
2305        Ok(())
2306    }
2307
2308    #[test]
2309    fn warmup_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>> {
2310        let report = WarmupReport {
2311            url: "https://example.com".to_string(),
2312            elapsed_ms: 320,
2313            status_code: Some(200),
2314            title: "Example Domain".to_string(),
2315            stabilized: true,
2316        };
2317        let json = serde_json::to_string(&report)?;
2318        let restored: WarmupReport = serde_json::from_str(&json)?;
2319        assert_eq!(restored.url, "https://example.com");
2320        assert_eq!(restored.elapsed_ms, 320);
2321        assert_eq!(restored.status_code, Some(200));
2322        assert_eq!(restored.title, "Example Domain");
2323        assert!(restored.stabilized);
2324        Ok(())
2325    }
2326
2327    #[test]
2328    fn refresh_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
2329    {
2330        let report = RefreshReport {
2331            url: "https://example.com/".to_string(),
2332            elapsed_ms: 180,
2333            status_code: Some(304),
2334        };
2335        let json = serde_json::to_string(&report)?;
2336        let restored: RefreshReport = serde_json::from_str(&json)?;
2337        assert_eq!(restored.url, "https://example.com/");
2338        assert_eq!(restored.elapsed_ms, 180);
2339        assert_eq!(restored.status_code, Some(304));
2340        Ok(())
2341    }
2342
2343    #[test]
2344    fn warmup_options_missing_stabilize_ms_defaults_to_zero()
2345    -> std::result::Result<(), Box<dyn std::error::Error>> {
2346        // stabilize_ms has `#[serde(default)]`; omitting it from JSON should
2347        // deserialize to 0 rather than erroring.
2348        let json = r#"{"url":"https://example.com","timeout_ms":30000}"#;
2349        let opts: WarmupOptions = serde_json::from_str(json)?;
2350        assert_eq!(opts.stabilize_ms, 0);
2351        Ok(())
2352    }
2353
2354    // ── Integration tests (require live Chrome — skipped in CI) ──────────────
2355
2356    /// Warm up a page then immediately extract content from the same origin.
2357    #[test]
2358    #[ignore = "requires live Chrome"]
2359    #[allow(clippy::expect_used)]
2360    fn integration_warmup_then_extraction() {
2361        let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
2362        rt.block_on(async {
2363            use crate::{BrowserConfig, BrowserPool};
2364            let pool = BrowserPool::new(BrowserConfig::default())
2365                .await
2366                .expect("pool");
2367            let handle = pool.acquire().await.expect("handle");
2368            let mut page = handle
2369                .browser()
2370                .expect("browser")
2371                .new_page()
2372                .await
2373                .expect("page");
2374
2375            let report = page
2376                .warmup(WarmupOptions {
2377                    url: "https://example.com".to_string(),
2378                    wait: WarmupWait::DomContentLoaded,
2379                    timeout_ms: 30_000,
2380                    stabilize_ms: 0,
2381                })
2382                .await
2383                .expect("warmup");
2384
2385            assert!(!report.title.is_empty(), "title populated after warmup");
2386            assert!(report.elapsed_ms > 0);
2387
2388            // Confirm the page is still usable for further queries.
2389            let html = page.content().await.expect("content");
2390            assert!(
2391                html.contains("example"),
2392                "page content available after warmup"
2393            );
2394
2395            page.close().await.expect("close");
2396            handle.release().await;
2397        });
2398    }
2399
2400    /// Refresh a page and verify session continuity (URL unchanged, page
2401    /// still navigable).
2402    #[test]
2403    #[ignore = "requires live Chrome"]
2404    #[allow(clippy::expect_used)]
2405    fn integration_refresh_keeps_session_state() {
2406        let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
2407        rt.block_on(async {
2408            use crate::{BrowserConfig, BrowserPool};
2409            let pool = BrowserPool::new(BrowserConfig::default())
2410                .await
2411                .expect("pool");
2412            let handle = pool.acquire().await.expect("handle");
2413            let mut page = handle
2414                .browser()
2415                .expect("browser")
2416                .new_page()
2417                .await
2418                .expect("page");
2419
2420            page.navigate(
2421                "https://example.com",
2422                WaitUntil::DomContentLoaded,
2423                Duration::from_secs(30),
2424            )
2425            .await
2426            .expect("initial navigate");
2427
2428            let report = page
2429                .refresh(RefreshOptions::default())
2430                .await
2431                .expect("refresh");
2432
2433            assert!(
2434                report.url.contains("example.com"),
2435                "URL retained after refresh; got: {}",
2436                report.url
2437            );
2438            assert!(report.elapsed_ms > 0);
2439
2440            page.close().await.expect("close");
2441            handle.release().await;
2442        });
2443    }
2444}