Skip to main content

stygian_browser/
page.rs

1//!
2//! ## Resource blocking
3//!
4//! ## Wait strategies
5//!
6//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
7//! - `DomContentLoaded` — fires when the HTML is parsed
8//!
9//! # Example
10//!
11//! ```no_run
12//! use stygian_browser::{BrowserPool, BrowserConfig};
13//! use stygian_browser::page::{ResourceFilter, WaitUntil};
14//! use std::time::Duration;
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
18//! let handle = pool.acquire().await?;
19//!
20//! let mut page = handle.browser().expect("valid browser").new_page().await?;
21//! page.set_resource_filter(ResourceFilter::block_media()).await?;
22//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
23//! let title = page.title().await?;
24//! println!("title: {title}");
25//! handle.release().await;
26//! # Ok(())
27//! # }
28//! ```
29
30use std::collections::HashMap;
31use std::sync::{
32    Arc,
33    atomic::{AtomicU16, Ordering},
34};
35use std::time::Duration;
36
37use chromiumoxide::Page;
38use serde::{Deserialize, Serialize};
39use tokio::time::timeout;
40use tracing::{debug, warn};
41
42use crate::error::{BrowserError, Result};
43
44// ─── ResourceType ─────────────────────────────────────────────────────────────
45
46/// CDP resource types that can be intercepted.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum ResourceType {
49    /// `<img>`, `<picture>`, background images
50    Image,
51    /// Web fonts loaded via CSS `@font-face`
52    Font,
53    /// External CSS stylesheets
54    Stylesheet,
55    /// Media files (audio/video)
56    Media,
57}
58
59impl ResourceType {
60    #[must_use]
61    pub const fn as_cdp_str(&self) -> &'static str {
62        match self {
63            Self::Image => "Image",
64            Self::Font => "Font",
65            Self::Stylesheet => "Stylesheet",
66            Self::Media => "Media",
67        }
68    }
69}
70
71// ─── ResourceFilter ───────────────────────────────────────────────────────────
72
73///
74/// # Example
75///
76/// ```
77/// use stygian_browser::page::ResourceFilter;
78/// let filter = ResourceFilter::block_media();
79/// assert!(filter.should_block("Image"));
80/// ```
81#[derive(Debug, Clone, Default)]
82pub struct ResourceFilter {
83    blocked: Vec<ResourceType>,
84}
85
86impl ResourceFilter {
87    /// Block all media resources (images, fonts, CSS, audio/video).
88    #[must_use]
89    pub fn block_media() -> Self {
90        Self {
91            blocked: vec![
92                ResourceType::Image,
93                ResourceType::Font,
94                ResourceType::Stylesheet,
95                ResourceType::Media,
96            ],
97        }
98    }
99
100    #[must_use]
101    pub fn block_images_and_fonts() -> Self {
102        Self {
103            blocked: vec![ResourceType::Image, ResourceType::Font],
104        }
105    }
106
107    #[must_use]
108    pub fn block(mut self, resource: ResourceType) -> Self {
109        if !self.blocked.contains(&resource) {
110            self.blocked.push(resource);
111        }
112        self
113    }
114
115    #[must_use]
116    pub fn should_block(&self, cdp_type: &str) -> bool {
117        self.blocked
118            .iter()
119            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
120    }
121
122    #[must_use]
123    pub const fn is_empty(&self) -> bool {
124        self.blocked.is_empty()
125    }
126}
127
128// ─── WaitUntil ────────────────────────────────────────────────────────────────
129
130///
131/// # Example
132///
133/// ```
134/// use stygian_browser::page::WaitUntil;
135/// ```
136/// Specifies what condition to wait for after a page navigation.
137#[derive(Debug, Clone)]
138pub enum WaitUntil {
139    /// Fires when the initial HTML is fully parsed, without waiting for
140    /// subresources such as images and stylesheets to finish loading.
141    DomContentLoaded,
142    NetworkIdle,
143    Selector(String),
144}
145
146// ─── OuterHtmlStrategy / OuterHtmlResult ──────────────────────────────────────
147
148/// Selector for [`NodeHandle::outer_html_with_strategy`].
149///
150/// The default [`OuterHtmlStrategy::Current`] preserves the historical call
151/// path used by [`NodeHandle::outer_html`]: a Chromium element-level
152/// `outer_html()` call (which evaluates `this.outerHTML` via JS) followed
153/// by a direct `XMLSerializer` fallback when the primary call returns an
154/// empty payload.
155///
156/// [`OuterHtmlStrategy::Recursive`] uses the dedicated Chromium `DevTools`
157/// Protocol command `DOM.getOuterHTML` (a single round-trip, browser-side
158/// serialisation that already includes shadow-DOM roots) with a Rust-side
159/// fallback that calls `DOM.describeNode` with `depth = -1` and walks the
160/// resulting CDP `Node` tree to produce HTML locally.
161///
162/// Both strategies are **generic** — neither relies on Wix, SPA, or vendor
163/// attributes, classes, or heuristics. `Recursive` simply selects a different
164/// CDP backend that already handles deeply nested subtrees, large SPAs, and
165/// shadow-DOM trees correctly in a single browser-side pass.
166///
167/// # Example
168///
169/// ```
170/// use stygian_browser::page::OuterHtmlStrategy;
171/// assert_eq!(OuterHtmlStrategy::default(), OuterHtmlStrategy::Current);
172/// assert_eq!(OuterHtmlStrategy::Current.as_str(), "Current");
173/// assert_eq!(OuterHtmlStrategy::Recursive.as_str(), "Recursive");
174/// ```
175#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
176pub enum OuterHtmlStrategy {
177    /// Legacy behaviour: element-level JS eval + `XMLSerializer` fallback.
178    #[default]
179    Current,
180    /// CDP `DOM.getOuterHTML` (single round-trip) + Rust-side
181    /// `DOM.describeNode` walk fallback.
182    Recursive,
183}
184
185impl OuterHtmlStrategy {
186    /// Stable identifier suitable for logs, metrics, and serialization.
187    #[must_use]
188    pub const fn as_str(&self) -> &'static str {
189        match self {
190            Self::Current => "Current",
191            Self::Recursive => "Recursive",
192        }
193    }
194
195    /// All known variants in declaration order. Useful for exhaustive
196    /// iteration in tests and diagnostics.
197    #[must_use]
198    pub const fn all() -> [Self; 2] {
199        [Self::Current, Self::Recursive]
200    }
201}
202
203impl std::fmt::Display for OuterHtmlStrategy {
204    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
205        f.write_str(self.as_str())
206    }
207}
208
209/// Outcome of [`NodeHandle::outer_html_with_strategy`].
210///
211/// The default `String`-returning [`NodeHandle::outer_html`] flattens this
212/// into a `Result<String>` where `Empty` and `Failed` both surface as the
213/// empty string — preserving the historical contract.
214///
215/// Derives [`Serialize`] so callers can include the outcome in structured
216/// logs, metrics, or per-request reports. `Deserialize` is intentionally not
217/// derived because the `Failed::backends` field holds `&'static str`
218/// backend names — a deserialised value would need owned `String`s and
219/// would lose the typed backend taxonomy this enum encodes.
220#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
221pub enum OuterHtmlResult {
222    /// The chosen strategy's backends all returned an empty payload. This
223    /// typically means the page is still rendering or the node has been
224    /// detached since the handle was created.
225    Empty,
226    /// Successfully serialised outer markup for the target node.
227    Content(String),
228    /// Every backend the strategy tried returned an error. The list names
229    /// the backends in the order they were attempted so callers can build
230    /// retry strategies or surface diagnostics.
231    Failed {
232        /// Names of the backends that returned an error.
233        backends: Vec<&'static str>,
234    },
235}
236
237impl OuterHtmlResult {
238    /// Return the serialized markup, or `None` if the result is `Empty` or
239    /// `Failed`.
240    #[must_use]
241    pub const fn content(&self) -> Option<&str> {
242        match self {
243            Self::Content(s) => Some(s.as_str()),
244            Self::Empty | Self::Failed { .. } => None,
245        }
246    }
247
248    /// `true` when the result carries no usable markup — either `Empty` or
249    /// `Failed`.
250    #[must_use]
251    pub const fn is_empty(&self) -> bool {
252        match self {
253            Self::Content(s) => s.is_empty(),
254            Self::Empty | Self::Failed { .. } => true,
255        }
256    }
257}
258
259impl std::fmt::Display for OuterHtmlResult {
260    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261        match self {
262            Self::Empty => f.write_str("Empty"),
263            Self::Content(s) => write!(f, "Content({} bytes)", s.len()),
264            Self::Failed { backends } => write!(f, "Failed({})", backends.join(", ")),
265        }
266    }
267}
268
269// ─── NodeHandle ───────────────────────────────────────────────────────────────
270
271///
272/// more CDP `Runtime.callFunctionOn` calls against the held V8 remote object
273/// reference — no HTML serialisation occurs.
274///
275/// A handle becomes **stale** after page navigation or if the underlying DOM
276/// node is removed.  Stale calls return [`BrowserError::StaleNode`] so callers
277/// can distinguish them from other CDP failures.
278///
279/// # Example
280///
281/// ```no_run
282/// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
283/// use std::time::Duration;
284///
285/// # async fn run() -> stygian_browser::error::Result<()> {
286/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
287/// let handle = pool.acquire().await?;
288/// let mut page = handle.browser().expect("valid browser").new_page().await?;
289/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
290/// # let nodes = page.query_selector_all("a").await?;
291/// # for node in &nodes {
292///     let href = node.attr("href").await?;
293///     let text = node.text_content().await?;
294///     println!("{text}: {href:?}");
295/// # }
296/// # Ok(())
297/// # }
298/// ```
299pub struct NodeHandle {
300    element: chromiumoxide::element::Element,
301    /// Shared via `Arc<str>` so all handles from a single query reuse the
302    /// same allocation rather than cloning a `String` per node.
303    selector: Arc<str>,
304    cdp_timeout: Duration,
305    /// during DOM traversal (parent / sibling navigation).
306    page: chromiumoxide::Page,
307}
308
309impl NodeHandle {
310    /// Return a single attribute value, or `None` if the attribute is absent.
311    ///
312    /// Issues one `Runtime.callFunctionOn` CDP call (`el.getAttribute(name)`).
313    ///
314    /// # Errors
315    ///
316    /// invalidated, or [`BrowserError::Timeout`] / [`BrowserError::CdpError`]
317    /// on transport-level failures.
318    pub async fn attr(&self, name: &str) -> Result<Option<String>> {
319        timeout(self.cdp_timeout, self.element.attribute(name))
320            .await
321            .map_err(|_| BrowserError::Timeout {
322                operation: "NodeHandle::attr".to_string(),
323                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
324            })?
325            .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
326    }
327
328    /// Return all attributes as a `HashMap<name, value>` in a **single**
329    /// CDP round-trip.
330    ///
331    /// Uses `DOM.getAttributes` (via the chromiumoxide `attributes()` API)
332    /// which returns a flat `[name, value, name, value, …]` list from the node
333    /// description — no per-attribute calls are needed.
334    ///
335    /// # Errors
336    ///
337    /// invalidated.
338    pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
339        let flat = timeout(self.cdp_timeout, self.element.attributes())
340            .await
341            .map_err(|_| BrowserError::Timeout {
342                operation: "NodeHandle::attr_map".to_string(),
343                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
344            })?
345            .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
346
347        let mut map = HashMap::with_capacity(flat.len() / 2);
348        for pair in flat.chunks_exact(2) {
349            if let [name, value] = pair {
350                map.insert(name.clone(), value.clone());
351            }
352        }
353        Ok(map)
354    }
355
356    /// Return the element's `textContent` (all text inside, no markup).
357    ///
358    /// Reads the DOM `textContent` property via a single JS eval — this is the
359    /// raw text concatenation of all descendant text nodes, independent of
360    /// layout or visibility (unlike `innerText`).
361    ///
362    ///
363    /// # Errors
364    ///
365    /// invalidated.
366    pub async fn text_content(&self) -> Result<String> {
367        let returns = timeout(
368            self.cdp_timeout,
369            self.element
370                .call_js_fn(r"function() { return this.textContent ?? ''; }", true),
371        )
372        .await
373        .map_err(|_| BrowserError::Timeout {
374            operation: "NodeHandle::text_content".to_string(),
375            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
376        })?
377        .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))?;
378
379        Ok(returns
380            .result
381            .value
382            .as_ref()
383            .and_then(|v| v.as_str())
384            .unwrap_or("")
385            .to_string())
386    }
387
388    /// Return the element's `innerHTML`.
389    ///
390    ///
391    /// # Errors
392    ///
393    /// invalidated.
394    pub async fn inner_html(&self) -> Result<String> {
395        timeout(self.cdp_timeout, self.element.inner_html())
396            .await
397            .map_err(|_| BrowserError::Timeout {
398                operation: "NodeHandle::inner_html".to_string(),
399                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
400            })?
401            .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
402            .map(Option::unwrap_or_default)
403    }
404
405    /// Return the element's `outerHTML`.
406    ///
407    /// Backwards-compatible thin wrapper around
408    /// [`outer_html_with_strategy`][Self::outer_html_with_strategy] using the
409    /// default [`OuterHtmlStrategy::Current`] strategy. Preserves the
410    /// historical return contract: `Ok(String)` where the string may be
411    /// empty when both the primary and fallback backends return empty
412    /// payloads.
413    ///
414    /// Callers that need to distinguish an empty payload from a hard failure
415    /// — or that want the deeper `DOM.getOuterHTML` + Rust-side walk path —
416    /// should call [`outer_html_with_strategy`][Self::outer_html_with_strategy]
417    /// directly.
418    ///
419    /// # Errors
420    ///
421    /// Returns an error when any CDP call the chosen strategy actually
422    /// invokes fails — that includes both the primary call and any fallback
423    /// call (the `XMLSerializer` JS fallback for [`OuterHtmlStrategy::Current`],
424    /// the `DOM.describeNode` walk for [`OuterHtmlStrategy::Recursive`]).
425    /// Errors surface as [`BrowserError::Timeout`] (CDP call exceeded
426    /// `cdp_timeout`), [`BrowserError::StaleNode`] (the handle was
427    /// invalidated mid-call), or [`BrowserError::CdpError`] (transport-level
428    /// failure).
429    ///
430    /// Empty or partially-empty payloads from any individual backend do
431    /// **not** error — they are flattened to an empty `String` so the
432    /// historical `Ok(String)` contract is preserved. Callers that need to
433    /// distinguish an empty payload from a hard failure should call
434    /// [`outer_html_with_strategy`][Self::outer_html_with_strategy]
435    /// directly and inspect the [`OuterHtmlResult`] variant.
436    pub async fn outer_html(&self) -> Result<String> {
437        match self
438            .outer_html_with_strategy(OuterHtmlStrategy::Current)
439            .await?
440        {
441            OuterHtmlResult::Content(s) => Ok(s),
442            OuterHtmlResult::Empty | OuterHtmlResult::Failed { .. } => Ok(String::new()),
443        }
444    }
445
446    /// Return the element's `outerHTML` using an explicit resolution strategy.
447    ///
448    /// The [`OuterHtmlStrategy::Current`] strategy matches the historical
449    /// [`outer_html`][Self::outer_html] path: a Chromium element-level JS
450    /// evaluation of `this.outerHTML`, followed by a JS
451    /// `new XMLSerializer().serializeToString(this)` fallback when the
452    /// primary call returns an empty payload.
453    ///
454    /// The [`OuterHtmlStrategy::Recursive`] strategy resolves [#66] for
455    /// sites where the JS-side `outerHTML` accessor intermittently returns
456    /// a truncated or empty payload — most notably Wix Studio / Editor X
457    /// pages and large SPAs with deeply nested shadow-DOM subtrees. It
458    /// prefers the dedicated Chromium `DevTools` Protocol command
459    /// `DOM.getOuterHTML` (a single round-trip that performs the
460    /// serialisation inside the browser, with shadow-DOM roots included by
461    /// default) and falls back to a Rust-side walk that calls
462    /// `DOM.describeNode` with `depth = -1` and serialises the resulting
463    /// `Node` tree to HTML locally. Neither path relies on Wix-specific
464    /// selectors, attributes, or heuristics — the resolution is entirely
465    /// driven by CDP commands Chromium already exposes.
466    ///
467    /// Both strategies return [`OuterHtmlResult::Empty`] (rather than
468    /// `Failed`) when every backend returns an empty payload — this is
469    /// indistinguishable from "node legitimately empty" at the CDP layer.
470    ///
471    /// [#66]: https://github.com/greysquirr3l/stygian/issues/66
472    ///
473    /// # Errors
474    ///
475    /// Returns [`BrowserError::Timeout`] if the primary CDP call exceeds
476    /// `cdp_timeout`, [`BrowserError::StaleNode`] if the handle was
477    /// invalidated, or [`BrowserError::CdpError`] on transport-level
478    /// failure.
479    ///
480    /// # Example
481    ///
482    /// ```no_run
483    /// use stygian_browser::page::OuterHtmlStrategy;
484    /// # use stygian_browser::error::Result;
485    /// # async fn run(handle: stygian_browser::NodeHandle) -> Result<()> {
486    /// // Use the deep-resolution path for SPA / Wix Studio / shadow-DOM pages.
487    /// let html = handle
488    ///     .outer_html_with_strategy(OuterHtmlStrategy::Recursive)
489    ///     .await?;
490    /// # let _ = html;
491    /// # Ok(())
492    /// # }
493    /// ```
494    pub async fn outer_html_with_strategy(
495        &self,
496        strategy: OuterHtmlStrategy,
497    ) -> Result<OuterHtmlResult> {
498        match strategy {
499            OuterHtmlStrategy::Current => self.outer_html_current().await,
500            OuterHtmlStrategy::Recursive => self.outer_html_recursive().await,
501        }
502    }
503
504    /// Strategy body for [`OuterHtmlStrategy::Current`].
505    async fn outer_html_current(&self) -> Result<OuterHtmlResult> {
506        let primary = timeout(self.cdp_timeout, self.element.outer_html())
507            .await
508            .map_err(|_| BrowserError::Timeout {
509                operation: "NodeHandle::outer_html_with_strategy(Current)".to_string(),
510                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
511            })?
512            .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_current"))?;
513
514        if let Some(html) = primary
515            && !html.trim().is_empty()
516        {
517            return Ok(OuterHtmlResult::Content(html));
518        }
519
520        let fallback_html = self.outer_html_via_js().await?;
521        if !fallback_html.trim().is_empty() {
522            return Ok(OuterHtmlResult::Content(fallback_html));
523        }
524
525        Ok(OuterHtmlResult::Empty)
526    }
527
528    /// Strategy body for [`OuterHtmlStrategy::Recursive`].
529    ///
530    /// Primary: `DOM.getOuterHTML` (single round-trip, browser-side
531    /// serialisation). Fallback: `DOM.describeNode(nodeId, depth=-1)` +
532    /// Rust-side `Node` → HTML serializer.
533    async fn outer_html_recursive(&self) -> Result<OuterHtmlResult> {
534        use chromiumoxide::cdp::browser_protocol::dom::{GetOuterHtmlParams, GetOuterHtmlReturns};
535        use chromiumoxide::types::CommandResponse;
536
537        let mut failed_backends: Vec<&'static str> = Vec::new();
538
539        let primary = timeout(
540            self.cdp_timeout,
541            self.page.execute(
542                GetOuterHtmlParams::builder()
543                    .node_id(self.element.node_id)
544                    .build(),
545            ),
546        )
547        .await
548        .map_err(|_| BrowserError::Timeout {
549            operation: "NodeHandle::outer_html_with_strategy(Recursive)".to_string(),
550            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
551        })?
552        .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_recursive::DOM.getOuterHTML"));
553
554        match primary {
555            Ok(CommandResponse {
556                result: GetOuterHtmlReturns { outer_html },
557                ..
558            }) if !outer_html.trim().is_empty() => {
559                return Ok(OuterHtmlResult::Content(outer_html));
560            }
561            Ok(CommandResponse {
562                result: GetOuterHtmlReturns { outer_html },
563                ..
564            }) => {
565                debug!(
566                    selector = %self.selector,
567                    bytes = outer_html.len(),
568                    "DOM.getOuterHTML returned empty payload; falling back to DOM.describeNode walk"
569                );
570            }
571            Err(e) => {
572                failed_backends.push("DOM.getOuterHTML");
573                debug!(
574                    selector = %self.selector,
575                    error = %e,
576                    "DOM.getOuterHTML failed; falling back to DOM.describeNode walk"
577                );
578            }
579        }
580
581        match self.outer_html_via_rust_walk().await {
582            Ok(html) if !html.trim().is_empty() => Ok(OuterHtmlResult::Content(html)),
583            Ok(_) => {
584                if failed_backends.is_empty() {
585                    // Every backend returned an empty payload (no errors
586                    // raised). Surface this as `Empty` rather than `Failed`.
587                    Ok(OuterHtmlResult::Empty)
588                } else {
589                    // At least one backend errored and the other returned
590                    // empty — surface as `Failed` so callers can
591                    // distinguish "nothing to serialize" from "backends
592                    // broke".
593                    Ok(OuterHtmlResult::Failed {
594                        backends: failed_backends,
595                    })
596                }
597            }
598            Err(e) => {
599                failed_backends.push("DOM.describeNode-walk");
600                debug!(
601                    selector = %self.selector,
602                    error = %e,
603                    "Rust-side DOM.describeNode walk failed"
604                );
605                Ok(OuterHtmlResult::Failed {
606                    backends: failed_backends,
607                })
608            }
609        }
610    }
611
612    /// Rust-side fallback: `DOM.describeNode` with `depth = -1` returns the
613    /// entire subtree rooted at the target node; we walk it locally and emit
614    /// HTML using [`serialize_node_tree`].
615    async fn outer_html_via_rust_walk(&self) -> Result<String> {
616        use chromiumoxide::cdp::browser_protocol::dom::DescribeNodeParams;
617        use chromiumoxide::types::CommandResponse;
618
619        let described: CommandResponse<
620            chromiumoxide::cdp::browser_protocol::dom::DescribeNodeReturns,
621        > = timeout(
622            self.cdp_timeout,
623            self.page.execute(
624                DescribeNodeParams::builder()
625                    .node_id(self.element.node_id)
626                    .depth(-1)
627                    .build(),
628            ),
629        )
630        .await
631        .map_err(|_| BrowserError::Timeout {
632            operation: "NodeHandle::outer_html_via_rust_walk".to_string(),
633            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
634        })?
635        .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_via_rust_walk"))?;
636
637        Ok(serialize_node_tree(&described.node))
638    }
639
640    async fn outer_html_via_js(&self) -> Result<String> {
641        let returns = timeout(
642            self.cdp_timeout,
643            self.element.call_js_fn(
644                r"function() {
645                    if (typeof this.outerHTML === 'string' && this.outerHTML.length > 0) {
646                        return this.outerHTML;
647                    }
648                    try {
649                        return new XMLSerializer().serializeToString(this);
650                    } catch (_) {
651                        return '';
652                    }
653                }",
654                true,
655            ),
656        )
657        .await
658        .map_err(|_| BrowserError::Timeout {
659            operation: "NodeHandle::outer_html_via_js".to_string(),
660            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
661        })?
662        .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_via_js"))?;
663
664        Ok(returns
665            .result
666            .value
667            .as_ref()
668            .and_then(serde_json::Value::as_str)
669            .unwrap_or_default()
670            .to_string())
671    }
672
673    ///
674    /// Executes a single `Runtime.callFunctionOn` JavaScript function that
675    /// walks `parentElement` and collects tag names — no repeated CDP calls.
676    ///
677    /// ```text
678    /// ["p", "article", "body", "html"]
679    /// ```
680    ///
681    /// # Errors
682    ///
683    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] when CDP
684    pub async fn ancestors(&self) -> Result<Vec<String>> {
685        let returns = timeout(
686            self.cdp_timeout,
687            self.element.call_js_fn(
688                r"function() {
689                    const a = [];
690                    let n = this.parentElement;
691                    while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
692                    return a;
693                }",
694                true,
695            ),
696        )
697        .await
698        .map_err(|_| BrowserError::Timeout {
699            operation: "NodeHandle::ancestors".to_string(),
700            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
701        })?
702        .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
703
704        // With returnByValue=true and an array return, CDP delivers the value
705        // as a JSON array directly — no JSON.stringify/re-parse needed.
706        // A missing or wrong-type value indicates an unexpected CDP failure.
707        let arr = returns
708            .result
709            .value
710            .as_ref()
711            .and_then(|v| v.as_array())
712            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
713                script: "NodeHandle::ancestors".to_string(),
714                reason: "CDP returned no value or a non-array value for ancestors()".to_string(),
715            })?;
716
717        arr.iter()
718            .map(|v| {
719                v.as_str().map(ToString::to_string).ok_or_else(|| {
720                    BrowserError::ScriptExecutionFailed {
721                        script: "NodeHandle::ancestors".to_string(),
722                        reason: format!("ancestor entry is not a string: {v}"),
723                    }
724                })
725            })
726            .collect()
727    }
728
729    ///
730    ///
731    ///
732    /// # Errors
733    ///
734    /// invalidated, or [`BrowserError::CdpError`] on transport failure.
735    pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
736        let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
737            .await
738            .map_err(|_| BrowserError::Timeout {
739                operation: "NodeHandle::children_matching".to_string(),
740                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
741            })?
742            .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
743
744        let selector_arc: Arc<str> = Arc::from(selector);
745        Ok(elements
746            .into_iter()
747            .map(|el| Self {
748                element: el,
749                selector: selector_arc.clone(),
750                cdp_timeout: self.cdp_timeout,
751                page: self.page.clone(),
752            })
753            .collect())
754    }
755
756    /// Return the immediate parent element, or `None` if this element has no
757    /// parent (i.e. it is the document root).
758    ///
759    /// Issues a single `Runtime.callFunctionOn` CDP call that temporarily tags
760    /// the parent element with a unique attribute, then resolves it via a
761    /// CSS attribute selector.
762    ///
763    /// # Errors
764    ///
765    /// Returns an error if the CDP call fails or the page handle is invalidated.
766    ///
767    /// # Example
768    ///
769    /// ```no_run
770    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
771    /// use std::time::Duration;
772    ///
773    /// # async fn run() -> stygian_browser::error::Result<()> {
774    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
775    /// let handle = pool.acquire().await?;
776    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
777    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
778    /// # let nodes = page.query_selector_all("a").await?;
779    /// if let Some(parent) = nodes[0].parent().await? {
780    ///     let html = parent.outer_html().await?;
781    ///     println!("parent: {}", &html[..html.len().min(80)]);
782    /// }
783    /// # Ok(())
784    /// # }
785    /// ```
786    pub async fn parent(&self) -> Result<Option<Self>> {
787        let attr = format!(
788            "data-stygian-t-{}",
789            ulid::Ulid::new().to_string().to_lowercase()
790        );
791        let js = format!(
792            "function() {{ \
793                var t = this.parentElement; \
794                if (!t) {{ return false; }} \
795                t.setAttribute('{attr}', '1'); \
796                return true; \
797            }}"
798        );
799        self.call_traversal(&js, &attr, "parent").await
800    }
801
802    /// Return the next element sibling, or `None` if this element is the last
803    /// child of its parent.
804    ///
805    /// Uses `nextElementSibling` (skips text/comment nodes).
806    ///
807    /// # Errors
808    ///
809    /// invalidated.
810    ///
811    /// # Example
812    ///
813    /// ```no_run
814    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
815    /// use std::time::Duration;
816    ///
817    /// # async fn run() -> stygian_browser::error::Result<()> {
818    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
819    /// let handle = pool.acquire().await?;
820    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
821    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
822    /// # let nodes = page.query_selector_all("a").await?;
823    /// if let Some(next) = nodes[0].next_sibling().await? {
824    ///     println!("next sibling: {}", next.text_content().await?);
825    /// }
826    /// # Ok(())
827    /// # }
828    /// ```
829    pub async fn next_sibling(&self) -> Result<Option<Self>> {
830        let attr = format!(
831            "data-stygian-t-{}",
832            ulid::Ulid::new().to_string().to_lowercase()
833        );
834        let js = format!(
835            "function() {{ \
836                var t = this.nextElementSibling; \
837                if (!t) {{ return false; }} \
838                t.setAttribute('{attr}', '1'); \
839                return true; \
840            }}"
841        );
842        self.call_traversal(&js, &attr, "next").await
843    }
844
845    /// Return the previous element sibling, or `None` if this element is the
846    /// first child of its parent.
847    ///
848    /// Uses `previousElementSibling` (skips text/comment nodes).
849    ///
850    /// # Errors
851    ///
852    /// invalidated.
853    ///
854    /// # Example
855    ///
856    /// ```no_run
857    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
858    /// use std::time::Duration;
859    ///
860    /// # async fn run() -> stygian_browser::error::Result<()> {
861    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
862    /// let handle = pool.acquire().await?;
863    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
864    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
865    /// # let nodes = page.query_selector_all("a").await?;
866    /// if let Some(prev) = nodes[1].previous_sibling().await? {
867    ///     println!("prev sibling: {}", prev.text_content().await?);
868    /// }
869    /// # Ok(())
870    /// # }
871    /// ```
872    pub async fn previous_sibling(&self) -> Result<Option<Self>> {
873        let attr = format!(
874            "data-stygian-t-{}",
875            ulid::Ulid::new().to_string().to_lowercase()
876        );
877        let js = format!(
878            "function() {{ \
879                var t = this.previousElementSibling; \
880                if (!t) {{ return false; }} \
881                t.setAttribute('{attr}', '1'); \
882                return true; \
883            }}"
884        );
885        self.call_traversal(&js, &attr, "prev").await
886    }
887
888    /// Shared traversal implementation used by [`parent`], [`next_sibling`],
889    /// and [`previous_sibling`].
890    ///
891    /// The caller provides a JS function that:
892    /// 1. Computes the traversal target (for example, the parent, next
893    ///    sibling, or previous sibling) and stores it in a local variable.
894    /// 2. If the target is non-null, sets a unique attribute (`attr_name`)
895    ///    on it and returns `true`.
896    /// 3. Returns `false` when the target is null (no such neighbour).
897    ///
898    /// This helper then resolves the tagged element from the document root,
899    /// removes the temporary attribute, and wraps the result in a
900    /// `NodeHandle`.
901    ///
902    /// [`parent`]: Self::parent
903    /// [`next_sibling`]: Self::next_sibling
904    /// [`previous_sibling`]: Self::previous_sibling
905    async fn call_traversal(
906        &self,
907        js_fn: &str,
908        attr_name: &str,
909        selector_suffix: &str,
910    ) -> Result<Option<Self>> {
911        // Step 1: Run the JS that tags the target element and reports null/non-null.
912        let op_tag = format!("NodeHandle::{selector_suffix}::tag");
913        let returns = timeout(self.cdp_timeout, self.element.call_js_fn(js_fn, false))
914            .await
915            .map_err(|_| BrowserError::Timeout {
916                operation: op_tag.clone(),
917                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
918            })?
919            .map_err(|e| self.cdp_err_or_stale(&e, selector_suffix))?;
920
921        // JS returns false → no such neighbour.
922        let has_target = returns
923            .result
924            .value
925            .as_ref()
926            .and_then(serde_json::Value::as_bool)
927            .unwrap_or(false);
928        if !has_target {
929            return Ok(None);
930        }
931
932        let css = format!("[{attr_name}]");
933        let op_resolve = format!("NodeHandle::{selector_suffix}::resolve");
934        let element = timeout(self.cdp_timeout, self.page.find_element(css))
935            .await
936            .map_err(|_| BrowserError::Timeout {
937                operation: op_resolve.clone(),
938                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
939            })?
940            .map_err(|e| BrowserError::CdpError {
941                operation: op_resolve,
942                message: format!("{e:?}"),
943            })?;
944
945        // is non-fatal — it leaves a harmless stale attribute in the DOM).
946        let cleanup = format!("function() {{ this.removeAttribute('{attr_name}'); }}");
947        let _ = element.call_js_fn(cleanup, false).await;
948
949        let new_selector: Arc<str> =
950            Arc::from(format!("{}::{selector_suffix}", self.selector).as_str());
951        Ok(Some(Self {
952            element,
953            selector: new_selector,
954            cdp_timeout: self.cdp_timeout,
955            page: self.page.clone(),
956        }))
957    }
958
959    /// (when the remote object reference has been invalidated) or
960    fn cdp_err_or_stale(
961        &self,
962        err: &chromiumoxide::error::CdpError,
963        operation: &str,
964    ) -> BrowserError {
965        let msg = format!("{err:?}");
966        if msg.contains("Cannot find object with id")
967            || msg.contains("context with specified id")
968            || msg.contains("Cannot find context")
969        {
970            BrowserError::StaleNode {
971                selector: self.selector.to_string(),
972            }
973        } else {
974            BrowserError::CdpError {
975                operation: operation.to_string(),
976                message: msg,
977            }
978        }
979    }
980}
981
982// ─── PageHandle ───────────────────────────────────────────────────────────────
983
984///
985///
986/// # Example
987///
988/// ```no_run
989/// use stygian_browser::{BrowserPool, BrowserConfig};
990/// use stygian_browser::page::WaitUntil;
991/// use std::time::Duration;
992///
993/// # async fn run() -> stygian_browser::error::Result<()> {
994/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
995/// let handle = pool.acquire().await?;
996/// let mut page = handle.browser().expect("valid browser").new_page().await?;
997/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
998/// let html = page.content().await?;
999/// drop(page); // closes the tab
1000/// handle.release().await;
1001/// # Ok(())
1002/// # }
1003/// ```
1004pub struct PageHandle {
1005    page: Page,
1006    cdp_timeout: Duration,
1007    /// HTTP status code of the most recent main-frame navigation, or `0` if not
1008    last_status_code: Arc<AtomicU16>,
1009    /// Background task processing `Fetch.requestPaused` events. Aborted and
1010    /// replaced each time `set_resource_filter` is called.
1011    resource_filter_task: Option<tokio::task::JoinHandle<()>>,
1012}
1013
1014impl PageHandle {
1015    /// Wrap a raw chromiumoxide [`Page`] in a handle.
1016    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
1017        Self {
1018            page,
1019            cdp_timeout,
1020            last_status_code: Arc::new(AtomicU16::new(0)),
1021            resource_filter_task: None,
1022        }
1023    }
1024
1025    ///
1026    /// # Errors
1027    ///
1028    /// the CDP call fails.
1029    pub async fn navigate(
1030        &mut self,
1031        url: &str,
1032        condition: WaitUntil,
1033        nav_timeout: Duration,
1034    ) -> Result<()> {
1035        self.setup_status_capture().await;
1036        timeout(
1037            nav_timeout,
1038            self.navigate_inner(url, condition, nav_timeout),
1039        )
1040        .await
1041        .map_err(|_| BrowserError::NavigationFailed {
1042            url: url.to_string(),
1043            reason: format!("navigation timed out after {nav_timeout:?}"),
1044        })?
1045    }
1046
1047    /// Reset the last status code and wire up the `Network.responseReceived`
1048    /// so that a missing network domain never blocks navigation.
1049    async fn setup_status_capture(&self) {
1050        use chromiumoxide::cdp::browser_protocol::network::{
1051            EventResponseReceived, ResourceType as NetworkResourceType,
1052        };
1053        use futures::StreamExt;
1054
1055        // Reset so a stale code is not returned if the new navigation fails
1056        self.last_status_code.store(0, Ordering::Release);
1057
1058        let page_for_listener = self.page.clone();
1059        let status_capture = Arc::clone(&self.last_status_code);
1060        match page_for_listener
1061            .event_listener::<EventResponseReceived>()
1062            .await
1063        {
1064            Ok(mut stream) => {
1065                tokio::spawn(async move {
1066                    while let Some(event) = stream.next().await {
1067                        if event.r#type == NetworkResourceType::Document {
1068                            let code = u16::try_from(event.response.status).unwrap_or(0);
1069                            if code > 0 {
1070                                status_capture.store(code, Ordering::Release);
1071                            }
1072                            break;
1073                        }
1074                    }
1075                });
1076            }
1077            Err(e) => warn!("status-code capture unavailable: {e}"),
1078        }
1079    }
1080
1081    /// described in issue #7.
1082    async fn navigate_inner(
1083        &self,
1084        url: &str,
1085        condition: WaitUntil,
1086        nav_timeout: Duration,
1087    ) -> Result<()> {
1088        use chromiumoxide::cdp::browser_protocol::page::{
1089            EventDomContentEventFired, EventLoadEventFired,
1090        };
1091        use futures::StreamExt;
1092
1093        let url_owned = url.to_string();
1094
1095        let mut dom_events = match &condition {
1096            WaitUntil::DomContentLoaded => Some(
1097                self.page
1098                    .event_listener::<EventDomContentEventFired>()
1099                    .await
1100                    .map_err(|e| BrowserError::NavigationFailed {
1101                        url: url_owned.clone(),
1102                        reason: format!("{e:?}"),
1103                    })?,
1104            ),
1105            _ => None,
1106        };
1107
1108        let mut load_events = match &condition {
1109            WaitUntil::NetworkIdle => Some(
1110                self.page
1111                    .event_listener::<EventLoadEventFired>()
1112                    .await
1113                    .map_err(|e| BrowserError::NavigationFailed {
1114                        url: url_owned.clone(),
1115                        reason: e.to_string(),
1116                    })?,
1117            ),
1118            _ => None,
1119        };
1120
1121        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
1122            Some(self.subscribe_inflight_counter().await)
1123        } else {
1124            None
1125        };
1126
1127        self.page
1128            .goto(url)
1129            .await
1130            .map_err(|e| BrowserError::NavigationFailed {
1131                url: url_owned.clone(),
1132                reason: e.to_string(),
1133            })?;
1134
1135        match &condition {
1136            WaitUntil::DomContentLoaded => {
1137                if let Some(ref mut events) = dom_events {
1138                    let _ = events.next().await;
1139                }
1140            }
1141            WaitUntil::NetworkIdle => {
1142                if let Some(ref mut events) = load_events {
1143                    let _ = events.next().await;
1144                }
1145                if let Some(ref counter) = inflight {
1146                    Self::wait_network_idle(counter).await;
1147                }
1148            }
1149            WaitUntil::Selector(css) => {
1150                self.wait_for_selector(css, nav_timeout).await?;
1151            }
1152        }
1153        Ok(())
1154    }
1155
1156    /// Spawn three detached tasks that maintain a signed in-flight request
1157    /// counter via `Network.requestWillBeSent` (+1) and
1158    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
1159    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
1160        use std::sync::atomic::AtomicI32;
1161
1162        use chromiumoxide::cdp::browser_protocol::network::{
1163            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
1164        };
1165        use futures::StreamExt;
1166
1167        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
1168        let pairs: [(Arc<AtomicI32>, i32); 3] = [
1169            (Arc::clone(&counter), 1),
1170            (Arc::clone(&counter), -1),
1171            (Arc::clone(&counter), -1),
1172        ];
1173        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
1174
1175        macro_rules! spawn_tracker {
1176            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
1177                match $page.event_listener::<$event>().await {
1178                    Ok(mut s) => {
1179                        let c = $c;
1180                        let d = $delta;
1181                        tokio::spawn(async move {
1182                            while s.next().await.is_some() {
1183                                c.fetch_add(d, Ordering::Relaxed);
1184                            }
1185                        });
1186                    }
1187                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
1188                }
1189            };
1190        }
1191
1192        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
1193        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
1194        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
1195        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
1196
1197        counter
1198    }
1199
1200    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
1201        const IDLE_THRESHOLD: i32 = 2;
1202        const SETTLE: Duration = Duration::from_millis(500);
1203        loop {
1204            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
1205                tokio::time::sleep(SETTLE).await;
1206                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
1207                    break;
1208                }
1209            } else {
1210                tokio::time::sleep(Duration::from_millis(50)).await;
1211            }
1212        }
1213    }
1214
1215    ///
1216    /// # Errors
1217    ///
1218    /// within the given timeout.
1219    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
1220        let selector_owned = selector.to_string();
1221        let poll = async {
1222            loop {
1223                if self.page.find_element(selector_owned.clone()).await.is_ok() {
1224                    return Ok(());
1225                }
1226                tokio::time::sleep(Duration::from_millis(100)).await;
1227            }
1228        };
1229
1230        timeout(wait_timeout, poll)
1231            .await
1232            .map_err(|_| BrowserError::NavigationFailed {
1233                url: String::new(),
1234                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
1235            })?
1236    }
1237
1238    ///
1239    /// Enables `Fetch` interception and spawns a background task that continues
1240    /// allowed requests and fails blocked ones with `BlockedByClient`. Any
1241    /// previously set filter task is cancelled first.
1242    ///
1243    /// # Errors
1244    ///
1245    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
1246        use chromiumoxide::cdp::browser_protocol::fetch::{
1247            ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
1248            RequestPattern,
1249        };
1250        use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
1251        use futures::StreamExt as _;
1252
1253        if filter.is_empty() {
1254            return Ok(());
1255        }
1256
1257        // Cancel any previously running filter task.
1258        if let Some(task) = self.resource_filter_task.take() {
1259            task.abort();
1260        }
1261
1262        let pattern = RequestPattern::builder().url_pattern("*").build();
1263        let params = EnableParams::builder()
1264            .patterns(vec![pattern])
1265            .handle_auth_requests(false)
1266            .build();
1267
1268        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
1269            .await
1270            .map_err(|_| BrowserError::Timeout {
1271                operation: "Fetch.enable".to_string(),
1272                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1273            })?
1274            .map_err(|e| BrowserError::CdpError {
1275                operation: "Fetch.enable".to_string(),
1276                message: e.to_string(),
1277            })?;
1278
1279        // is never blocked. Without this handler Chrome holds every intercepted
1280        // request indefinitely and the page hangs.
1281        let mut events = self
1282            .page
1283            .event_listener::<EventRequestPaused>()
1284            .await
1285            .map_err(|e| BrowserError::CdpError {
1286                operation: "Fetch.requestPaused subscribe".to_string(),
1287                message: e.to_string(),
1288            })?;
1289
1290        let page = self.page.clone();
1291        debug!("Resource filter active: {:?}", filter);
1292        let task = tokio::spawn(async move {
1293            while let Some(event) = events.next().await {
1294                let request_id = event.request_id.clone();
1295                if filter.should_block(event.resource_type.as_ref()) {
1296                    let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
1297                    let _ = page.execute(params).await;
1298                } else {
1299                    let _ = page.execute(ContinueRequestParams::new(request_id)).await;
1300                }
1301            }
1302        });
1303
1304        self.resource_filter_task = Some(task);
1305        Ok(())
1306    }
1307
1308    /// Return the current page URL (post-navigation, post-redirect).
1309    ///
1310    /// internally by [`save_cookies`](Self::save_cookies); no extra network
1311    /// request is made.  Returns an empty string if the URL is not yet set
1312    ///
1313    /// # Errors
1314    ///
1315    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
1316    ///
1317    /// # Example
1318    ///
1319    /// ```no_run
1320    /// use stygian_browser::{BrowserPool, BrowserConfig};
1321    /// use stygian_browser::page::WaitUntil;
1322    /// use std::time::Duration;
1323    ///
1324    /// # async fn run() -> stygian_browser::error::Result<()> {
1325    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1326    /// let handle = pool.acquire().await?;
1327    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1328    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1329    /// let url = page.url().await?;
1330    /// println!("Final URL after redirects: {url}");
1331    /// # Ok(())
1332    /// # }
1333    /// ```
1334    pub async fn url(&self) -> Result<String> {
1335        timeout(self.cdp_timeout, self.page.url())
1336            .await
1337            .map_err(|_| BrowserError::Timeout {
1338                operation: "page.url".to_string(),
1339                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1340            })?
1341            .map_err(|e| BrowserError::CdpError {
1342                operation: "page.url".to_string(),
1343                message: e.to_string(),
1344            })
1345            .map(Option::unwrap_or_default)
1346    }
1347
1348    /// Return the HTTP status code of the most recent main-frame navigation.
1349    ///
1350    /// The status is captured from the `Network.responseReceived` CDP event
1351    /// wired up inside [`navigate`](Self::navigate), so it reflects the
1352    /// *final* response after any server-side redirects.
1353    ///
1354    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
1355    /// or if the network event subscription failed.
1356    ///
1357    /// # Errors
1358    ///
1359    ///
1360    /// # Example
1361    ///
1362    /// ```no_run
1363    /// use stygian_browser::{BrowserPool, BrowserConfig};
1364    /// use stygian_browser::page::WaitUntil;
1365    /// use std::time::Duration;
1366    ///
1367    /// # async fn run() -> stygian_browser::error::Result<()> {
1368    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1369    /// let handle = pool.acquire().await?;
1370    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1371    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1372    /// if let Some(code) = page.status_code()? {
1373    ///     println!("HTTP {code}");
1374    /// }
1375    /// # Ok(())
1376    /// # }
1377    /// ```
1378    pub fn status_code(&self) -> Result<Option<u16>> {
1379        let code = self.last_status_code.load(Ordering::Acquire);
1380        Ok(if code == 0 { None } else { Some(code) })
1381    }
1382
1383    /// Return the page's `<title>` text.
1384    ///
1385    /// # Errors
1386    ///
1387    pub async fn title(&self) -> Result<String> {
1388        timeout(self.cdp_timeout, self.page.get_title())
1389            .await
1390            .map_err(|_| BrowserError::Timeout {
1391                operation: "get_title".to_string(),
1392                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1393            })?
1394            .map_err(|e| BrowserError::ScriptExecutionFailed {
1395                script: "document.title".to_string(),
1396                reason: e.to_string(),
1397            })
1398            .map(Option::unwrap_or_default)
1399    }
1400
1401    /// Return the page's full outer HTML.
1402    ///
1403    /// # Errors
1404    ///
1405    pub async fn content(&self) -> Result<String> {
1406        timeout(self.cdp_timeout, self.page.content())
1407            .await
1408            .map_err(|_| BrowserError::Timeout {
1409                operation: "page.content".to_string(),
1410                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1411            })?
1412            .map_err(|e| BrowserError::ScriptExecutionFailed {
1413                script: "document.documentElement.outerHTML".to_string(),
1414                reason: e.to_string(),
1415            })
1416    }
1417
1418    /// lightweight [`NodeHandle`]s backed by CDP `RemoteObjectId`s.
1419    ///
1420    /// No HTML serialisation occurs — the browser's in-memory DOM is queried
1421    /// directly over the CDP connection, eliminating the `page.content()` +
1422    /// `scraper::Html::parse_document` round-trip.
1423    ///
1424    ///
1425    /// # Errors
1426    ///
1427    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
1428    ///
1429    /// # Example
1430    ///
1431    /// ```no_run
1432    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1433    /// use std::time::Duration;
1434    ///
1435    /// # async fn run() -> stygian_browser::error::Result<()> {
1436    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1437    /// let handle = pool.acquire().await?;
1438    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1439    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
1440    /// # let nodes = page.query_selector_all("div[data-ux]").await?;
1441    /// # for node in &nodes {
1442    ///     let ux_type = node.attr("data-ux").await?;
1443    ///     let text    = node.text_content().await?;
1444    ///     println!("{ux_type:?}: {text}");
1445    /// # }
1446    /// # Ok(())
1447    /// # }
1448    /// ```
1449    pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
1450        let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
1451            .await
1452            .map_err(|_| BrowserError::Timeout {
1453                operation: "PageHandle::query_selector_all".to_string(),
1454                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1455            })?
1456            .map_err(|e| BrowserError::CdpError {
1457                operation: "PageHandle::query_selector_all".to_string(),
1458                message: e.to_string(),
1459            })?;
1460
1461        let selector_arc: Arc<str> = Arc::from(selector);
1462        Ok(elements
1463            .into_iter()
1464            .map(|el| NodeHandle {
1465                element: el,
1466                selector: selector_arc.clone(),
1467                cdp_timeout: self.cdp_timeout,
1468                page: self.page.clone(),
1469            })
1470            .collect())
1471    }
1472
1473    /// Evaluate arbitrary JavaScript and return the result as `T`.
1474    ///
1475    /// # Errors
1476    ///
1477    /// deserialization error.
1478    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
1479        let script_owned = script.to_string();
1480        timeout(self.cdp_timeout, self.page.evaluate(script))
1481            .await
1482            .map_err(|_| BrowserError::Timeout {
1483                operation: "page.evaluate".to_string(),
1484                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1485            })?
1486            .map_err(|e| BrowserError::ScriptExecutionFailed {
1487                script: script_owned.clone(),
1488                reason: e.to_string(),
1489            })?
1490            .into_value::<T>()
1491            .map_err(|e| BrowserError::ScriptExecutionFailed {
1492                script: script_owned,
1493                reason: e.to_string(),
1494            })
1495    }
1496
1497    ///
1498    /// # Errors
1499    ///
1500    pub async fn save_cookies(
1501        &self,
1502    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
1503        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
1504
1505        let url = self
1506            .page
1507            .url()
1508            .await
1509            .map_err(|e| BrowserError::CdpError {
1510                operation: "page.url".to_string(),
1511                message: e.to_string(),
1512            })?
1513            .unwrap_or_default();
1514
1515        timeout(
1516            self.cdp_timeout,
1517            self.page
1518                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
1519        )
1520        .await
1521        .map_err(|_| BrowserError::Timeout {
1522            operation: "Network.getCookies".to_string(),
1523            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1524        })?
1525        .map_err(|e| BrowserError::CdpError {
1526            operation: "Network.getCookies".to_string(),
1527            message: e.to_string(),
1528        })
1529        .map(|r| r.cookies.clone())
1530    }
1531
1532    ///
1533    /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
1534    /// requiring a direct `chromiumoxide` dependency in calling code.
1535    ///
1536    /// Individual cookie failures are logged as warnings and do not abort the
1537    /// remaining cookies.
1538    ///
1539    /// # Errors
1540    ///
1541    /// call exceeds `cdp_timeout`.
1542    ///
1543    /// # Example
1544    ///
1545    /// ```no_run
1546    /// use stygian_browser::{BrowserPool, BrowserConfig};
1547    /// use stygian_browser::session::SessionCookie;
1548    /// use std::time::Duration;
1549    ///
1550    /// # async fn run() -> stygian_browser::error::Result<()> {
1551    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1552    /// let handle = pool.acquire().await?;
1553    /// let page = handle.browser().expect("valid browser").new_page().await?;
1554    /// let cookies = vec![SessionCookie {
1555    ///     name: "session".to_string(),
1556    ///     value: "abc123".to_string(),
1557    ///     domain: ".example.com".to_string(),
1558    ///     path: "/".to_string(),
1559    ///     expires: -1.0,
1560    ///     http_only: true,
1561    ///     secure: true,
1562    ///     same_site: "Lax".to_string(),
1563    /// }];
1564    /// page.inject_cookies(&cookies).await?;
1565    /// # Ok(())
1566    /// # }
1567    /// ```
1568    pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1569        use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1570
1571        for cookie in cookies {
1572            let params = match SetCookieParams::builder()
1573                .name(cookie.name.clone())
1574                .value(cookie.value.clone())
1575                .domain(cookie.domain.clone())
1576                .path(cookie.path.clone())
1577                .http_only(cookie.http_only)
1578                .secure(cookie.secure)
1579                .build()
1580            {
1581                Ok(p) => p,
1582                Err(e) => {
1583                    warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1584                    continue;
1585                }
1586            };
1587
1588            match timeout(self.cdp_timeout, self.page.execute(params)).await {
1589                Err(_) => {
1590                    warn!(
1591                        cookie = %cookie.name,
1592                        timeout_ms = self.cdp_timeout.as_millis(),
1593                        "Timed out injecting cookie"
1594                    );
1595                }
1596                Ok(Err(e)) => {
1597                    warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1598                }
1599                Ok(Ok(_)) => {}
1600            }
1601        }
1602
1603        debug!(count = cookies.len(), "Cookies injected");
1604        Ok(())
1605    }
1606
1607    /// Capture a screenshot of the current page as PNG bytes.
1608    ///
1609    /// them in-memory.
1610    ///
1611    /// # Errors
1612    ///
1613    /// command fails, or [`BrowserError::Timeout`] if it exceeds
1614    /// `cdp_timeout`.
1615    ///
1616    /// # Example
1617    ///
1618    /// ```no_run
1619    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1620    /// use std::{time::Duration, fs};
1621    ///
1622    /// # async fn run() -> stygian_browser::error::Result<()> {
1623    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1624    /// let handle = pool.acquire().await?;
1625    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1626    /// let png = page.screenshot().await?;
1627    /// fs::write("screenshot.png", &png).unwrap();
1628    /// # Ok(())
1629    /// # }
1630    /// ```
1631    pub async fn screenshot(&self) -> Result<Vec<u8>> {
1632        use chromiumoxide::page::ScreenshotParams;
1633
1634        let params = ScreenshotParams::builder().full_page(true).build();
1635
1636        timeout(self.cdp_timeout, self.page.screenshot(params))
1637            .await
1638            .map_err(|_| BrowserError::Timeout {
1639                operation: "Page.captureScreenshot".to_string(),
1640                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1641            })?
1642            .map_err(|e| BrowserError::CdpError {
1643                operation: "Page.captureScreenshot".to_string(),
1644                message: e.to_string(),
1645            })
1646    }
1647
1648    /// Borrow the underlying chromiumoxide [`Page`].
1649    #[must_use]
1650    pub const fn inner(&self) -> &Page {
1651        &self.page
1652    }
1653
1654    /// Close this page (tab).
1655    ///
1656    /// # Errors
1657    ///
1658    /// Returns [`BrowserError::Timeout`] when the close call does not
1659    /// complete within the 5-second timeout, and
1660    /// [`BrowserError::CdpError`] for underlying chromiumoxide failures
1661    /// while issuing the `Page.close` CDP command.
1662    pub async fn close(self) -> Result<()> {
1663        timeout(Duration::from_secs(5), self.page.clone().close())
1664            .await
1665            .map_err(|_| BrowserError::Timeout {
1666                operation: "page.close".to_string(),
1667                duration_ms: 5000,
1668            })?
1669            .map_err(|e| BrowserError::CdpError {
1670                operation: "page.close".to_string(),
1671                message: e.to_string(),
1672            })
1673    }
1674}
1675
1676// ─── Stealth diagnostics ──────────────────────────────────────────────────────
1677
1678#[cfg(feature = "stealth")]
1679impl PageHandle {
1680    /// Run all built-in stealth detection checks against the current page.
1681    ///
1682    /// Iterates [`crate::diagnostic::all_checks`], evaluates each check's
1683    /// JavaScript via CDP `Runtime.evaluate`, and returns an aggregate
1684    /// [`crate::diagnostic::DiagnosticReport`].
1685    ///
1686    /// recorded as failing checks and do **not** abort the whole run.
1687    ///
1688    /// # Errors
1689    ///
1690    /// Individual check failures are captured in the report.
1691    ///
1692    /// # Example
1693    ///
1694    /// ```no_run
1695    /// # async fn run() -> stygian_browser::error::Result<()> {
1696    /// use stygian_browser::{BrowserPool, BrowserConfig};
1697    /// use stygian_browser::page::WaitUntil;
1698    /// use std::time::Duration;
1699    ///
1700    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1701    /// let handle = pool.acquire().await?;
1702    /// let browser = handle.browser().expect("valid browser");
1703    /// let mut page = browser.new_page().await?;
1704    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(10)).await?;
1705    ///
1706    /// let report = page.verify_stealth().await?;
1707    /// println!("Stealth: {}/{} checks passed", report.passed_count, report.checks.len());
1708    /// # for failure in report.failures() {
1709    ///     eprintln!("  FAIL  {}: {}", failure.description, failure.details);
1710    /// # }
1711    /// # Ok(())
1712    /// # }
1713    /// ```
1714    pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1715        use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks, all_limitation_probes};
1716
1717        let mut results: Vec<CheckResult> = Vec::new();
1718        let mut known_limitations = Vec::new();
1719
1720        for check in all_checks() {
1721            let result = match self.eval::<String>(check.script).await {
1722                Ok(json) => check.parse_output(&json),
1723                Err(e) => {
1724                    tracing::warn!(
1725                        check = ?check.id,
1726                        error = %e,
1727                        "stealth check script failed during evaluation"
1728                    );
1729                    CheckResult {
1730                        id: check.id,
1731                        description: check.description.to_string(),
1732                        passed: false,
1733                        details: format!("script error: {e}"),
1734                    }
1735                }
1736            };
1737            tracing::debug!(
1738                check = ?result.id,
1739                passed = result.passed,
1740                details = %result.details,
1741                "stealth check result"
1742            );
1743            results.push(result);
1744        }
1745
1746        for probe in all_limitation_probes() {
1747            let limitation = match self.eval::<String>(probe.script).await {
1748                Ok(json) => probe.parse_output(&json),
1749                Err(error) => Some(crate::diagnostic::KnownLimitation {
1750                    id: probe.id,
1751                    description: probe.description.to_string(),
1752                    details: format!("script error: {error}"),
1753                }),
1754            };
1755            if let Some(limitation) = limitation {
1756                tracing::debug!(
1757                    limitation = ?limitation.id,
1758                    details = %limitation.details,
1759                    "stealth limitation observed"
1760                );
1761                known_limitations.push(limitation);
1762            }
1763        }
1764
1765        Ok(DiagnosticReport::new(results).with_known_limitations(known_limitations))
1766    }
1767
1768    /// Run stealth checks and attach transport diagnostics (JA3/JA4/HTTP3).
1769    ///
1770    /// # Errors
1771    ///
1772    /// Propagates any [`BrowserError`] returned by the inner
1773    /// [`Self::verify_stealth`] call (which surfaces CDP / selector /
1774    /// evaluation failures from the underlying stealth probe). The
1775    /// `navigator.userAgent` read uses `eval` and is best-effort — its
1776    /// failure is logged and downgraded to an empty string so the
1777    /// transport-diagnostic block can still be attached.
1778    pub async fn verify_stealth_with_transport(
1779        &self,
1780        observed: Option<crate::diagnostic::TransportObservations>,
1781    ) -> Result<crate::diagnostic::DiagnosticReport> {
1782        let report = self.verify_stealth().await?;
1783
1784        let user_agent = match self.eval::<String>("navigator.userAgent").await {
1785            Ok(ua) => ua,
1786            Err(e) => {
1787                tracing::warn!(error = %e, "failed to read navigator.userAgent for transport diagnostics");
1788                String::new()
1789            }
1790        };
1791
1792        let transport = crate::diagnostic::TransportDiagnostic::from_user_agent_and_observations(
1793            &user_agent,
1794            observed.as_ref(),
1795        );
1796
1797        Ok(report.with_transport(transport))
1798    }
1799}
1800
1801// ─── extract feature ─────────────────────────────────────────────────────────
1802
1803#[cfg(feature = "extract")]
1804impl PageHandle {
1805    ///
1806    ///
1807    /// All per-node extractions are driven concurrently via
1808    /// [`futures::future::try_join_all`].
1809    ///
1810    /// # Errors
1811    ///
1812    /// fails, or [`BrowserError::ExtractionFailed`] if any field extraction
1813    /// fails.
1814    ///
1815    /// # Example
1816    ///
1817    /// ```ignore
1818    /// use stygian_browser::extract::Extract;
1819    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
1820    /// use std::time::Duration;
1821    ///
1822    /// #[derive(Extract)]
1823    /// struct Link {
1824    ///     href: Option<String>,
1825    /// }
1826    ///
1827    /// # async fn run() -> stygian_browser::error::Result<()> {
1828    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
1829    /// let handle = pool.acquire().await?;
1830    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
1831    /// page.navigate(
1832    ///     "https://example.com",
1833    ///     WaitUntil::DomContentLoaded,
1834    ///     Duration::from_secs(30),
1835    /// ).await?;
1836    /// let links: Vec<Link> = page.extract_all::<Link>("nav li").await?;
1837    /// # Ok(())
1838    /// # }
1839    /// ```
1840    pub async fn extract_all<T>(&self, selector: &str) -> Result<Vec<T>>
1841    where
1842        T: crate::extract::Extractable,
1843    {
1844        use futures::future::try_join_all;
1845
1846        let nodes = self.query_selector_all(selector).await?;
1847        try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1848            .await
1849            .map_err(BrowserError::ExtractionFailed)
1850    }
1851
1852    /// Try each selector in `selectors` in order and return the extracted
1853    /// results from the **first** selector that matches at least one node.
1854    ///
1855    /// This is useful when a page may use different markup across versions or
1856    /// A/B variants — supply the preferred selector first and progressively
1857    /// wider fallbacks afterwards.
1858    ///
1859    /// Returns an empty `Vec` only when *all* selectors match zero nodes
1860    /// (i.e. the element is genuinely absent from the page).  A non-empty
1861    /// intermediate selector result that then fails during extraction **will**
1862    /// return an error.
1863    ///
1864    /// # Errors
1865    ///
1866    /// Returns [`BrowserError::CdpError`] if the selector query fails, or
1867    /// [`BrowserError::ExtractionFailed`] if a matched node fails extraction.
1868    ///
1869    /// # Example
1870    ///
1871    /// ```ignore
1872    /// use stygian_browser::extract::Extract;
1873    ///
1874    /// #[derive(Extract)]
1875    /// struct Headline { title: String }
1876    ///
1877    /// # async fn run(page: &stygian_browser::PageHandle) -> stygian_browser::error::Result<()> {
1878    /// // Try modern selector first, fall back to legacy markup.
1879    /// let items = page
1880    ///     .extract_all_with_fallback::<Headline>(&["h2.headline", "h2.title", "h2"])
1881    ///     .await?;
1882    /// # Ok(())
1883    /// # }
1884    /// ```
1885    pub async fn extract_all_with_fallback<T>(&self, selectors: &[&str]) -> Result<Vec<T>>
1886    where
1887        T: crate::extract::Extractable,
1888    {
1889        use futures::future::try_join_all;
1890
1891        for &selector in selectors {
1892            let nodes = self.query_selector_all(selector).await?;
1893            if nodes.is_empty() {
1894                continue;
1895            }
1896            return try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1897                .await
1898                .map_err(BrowserError::ExtractionFailed);
1899        }
1900
1901        Ok(vec![])
1902    }
1903
1904    /// Extract from every node matching `selector`, **skipping** nodes where
1905    /// a required field is absent (i.e. [`ExtractionError::Missing`]).
1906    ///
1907    /// Unlike [`extract_all`], this method is lenient about structural
1908    /// mismatches: nodes that fail with [`ExtractionError::Missing`] are
1909    /// silently dropped from the result set.  All other extraction errors
1910    /// (CDP failures, stale nodes, nested errors) still propagate as hard
1911    /// failures.
1912    ///
1913    /// This is useful when scraping heterogeneous lists where some items
1914    /// lack an optional field that your struct treats as required.
1915    ///
1916    /// [`extract_all`]: Self::extract_all
1917    /// [`ExtractionError::Missing`]: crate::extract::ExtractionError::Missing
1918    ///
1919    /// # Errors
1920    ///
1921    /// Returns [`BrowserError::CdpError`] if the selector query fails, or
1922    /// [`BrowserError::ExtractionFailed`] for non-`Missing` extraction errors.
1923    ///
1924    /// # Example
1925    ///
1926    /// ```ignore
1927    /// use stygian_browser::extract::Extract;
1928    ///
1929    /// #[derive(Extract)]
1930    /// struct Price { amount: String }
1931    ///
1932    /// # async fn run(page: &stygian_browser::PageHandle) -> stygian_browser::error::Result<()> {
1933    /// // Products without a price tag are silently skipped.
1934    /// let prices = page.extract_resilient::<Price>(".product").await?;
1935    /// # Ok(())
1936    /// # }
1937    /// ```
1938    pub async fn extract_resilient<T>(&self, selector: &str) -> Result<Vec<T>>
1939    where
1940        T: crate::extract::Extractable,
1941    {
1942        use crate::extract::ExtractionError;
1943
1944        let nodes = self.query_selector_all(selector).await?;
1945        let mut results = Vec::with_capacity(nodes.len());
1946
1947        for node in &nodes {
1948            match T::extract_from(node).await {
1949                Ok(item) => results.push(item),
1950                Err(ExtractionError::Missing { .. }) => {
1951                    tracing::debug!(
1952                        selector,
1953                        "extract_resilient: skipping node with missing required field"
1954                    );
1955                }
1956                Err(e) => return Err(BrowserError::ExtractionFailed(e)),
1957            }
1958        }
1959
1960        Ok(results)
1961    }
1962}
1963
1964// ─── similarity feature ──────────────────────────────────────────────────────
1965
1966#[cfg(feature = "similarity")]
1967impl NodeHandle {
1968    /// node.
1969    ///
1970    /// Issues a single `Runtime.callFunctionOn` JS eval that extracts the tag,
1971    /// class list, attribute names, and body-depth in one round-trip.
1972    ///
1973    /// # Errors
1974    ///
1975    /// invalidated, or [`BrowserError::ScriptExecutionFailed`] if the script
1976    /// produces unexpected output.
1977    pub async fn fingerprint(&self) -> Result<crate::similarity::ElementFingerprint> {
1978        const JS: &str = r"function() {
1979    var el = this;
1980    var tag = el.tagName.toLowerCase();
1981    var classes = Array.prototype.slice.call(el.classList).sort();
1982    var attrNames = Array.prototype.slice.call(el.attributes)
1983        .map(function(a) { return a.name; })
1984        .filter(function(n) { return n !== 'class' && n !== 'id'; })
1985        .sort();
1986    var depth = 0;
1987    var n = el.parentElement;
1988    while (n && n.tagName.toLowerCase() !== 'body') { depth++; n = n.parentElement; }
1989    return JSON.stringify({ tag: tag, classes: classes, attrNames: attrNames, depth: depth });
1990}";
1991
1992        let returns = tokio::time::timeout(self.cdp_timeout, self.element.call_js_fn(JS, true))
1993            .await
1994            .map_err(|_| BrowserError::Timeout {
1995                operation: "NodeHandle::fingerprint".to_string(),
1996                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1997            })?
1998            .map_err(|e| self.cdp_err_or_stale(&e, "fingerprint"))?;
1999
2000        let json_str = returns
2001            .result
2002            .value
2003            .as_ref()
2004            .and_then(|v| v.as_str())
2005            .ok_or_else(|| BrowserError::ScriptExecutionFailed {
2006                script: "NodeHandle::fingerprint".to_string(),
2007                reason: "CDP returned no string value from fingerprint script".to_string(),
2008            })?;
2009
2010        serde_json::from_str::<crate::similarity::ElementFingerprint>(json_str).map_err(|e| {
2011            BrowserError::ScriptExecutionFailed {
2012                script: "NodeHandle::fingerprint".to_string(),
2013                reason: format!("failed to deserialise fingerprint JSON: {e}"),
2014            }
2015        })
2016    }
2017}
2018
2019#[cfg(feature = "similarity")]
2020impl PageHandle {
2021    /// `reference`, scored by [`crate::similarity::SimilarityConfig`].
2022    ///
2023    /// [`NodeHandle::fingerprint`]), then fingerprints every candidate returned
2024    /// [`crate::similarity::jaccard_weighted`] score exceeds
2025    /// `config.threshold`.  Results are ordered by score descending.
2026    ///
2027    /// # Example
2028    ///
2029    /// ```no_run
2030    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
2031    /// use stygian_browser::similarity::SimilarityConfig;
2032    /// use std::time::Duration;
2033    ///
2034    /// # async fn run() -> stygian_browser::error::Result<()> {
2035    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
2036    /// let handle = pool.acquire().await?;
2037    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
2038    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
2039    ///
2040    /// # let nodes = page.query_selector_all("h1").await?;
2041    /// # let reference = nodes.into_iter().next().ok_or(stygian_browser::error::BrowserError::StaleNode { selector: "h1".to_string() })?;
2042    ///     let similar = page.find_similar(&reference, SimilarityConfig::default()).await?;
2043    /// # for m in &similar {
2044    ///         println!("score={:.2}", m.score);
2045    /// # }
2046    /// # Ok(())
2047    /// # }
2048    /// ```
2049    ///
2050    /// # Errors
2051    ///
2052    /// [`BrowserError::ScriptExecutionFailed`] if a scoring script fails.
2053    pub async fn find_similar(
2054        &self,
2055        reference: &NodeHandle,
2056        config: crate::similarity::SimilarityConfig,
2057    ) -> Result<Vec<crate::similarity::SimilarMatch>> {
2058        use crate::similarity::{SimilarMatch, jaccard_weighted};
2059
2060        let ref_fp = reference.fingerprint().await?;
2061        let candidates = self.query_selector_all("*").await?;
2062
2063        let mut matches: Vec<SimilarMatch> = Vec::new();
2064        for node in candidates {
2065            if let Ok(cand_fp) = node.fingerprint().await {
2066                let score = jaccard_weighted(&ref_fp, &cand_fp);
2067                if score >= config.threshold {
2068                    matches.push(SimilarMatch { node, score });
2069                }
2070            }
2071            // Stale / detached nodes are silently skipped.
2072        }
2073
2074        matches.sort_by(|a, b| {
2075            b.score
2076                .partial_cmp(&a.score)
2077                .unwrap_or(std::cmp::Ordering::Equal)
2078        });
2079
2080        if config.max_results > 0 {
2081            matches.truncate(config.max_results);
2082        }
2083
2084        Ok(matches)
2085    }
2086}
2087
2088impl Drop for PageHandle {
2089    fn drop(&mut self) {
2090        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
2091        // chromiumoxide Page does not implement close on Drop, so we spawn
2092        // swap it out. We clone the Page handle (it's Arc-backed internally).
2093        let page = self.page.clone();
2094        tokio::spawn(async move {
2095            let _ = page.close().await;
2096        });
2097    }
2098}
2099
2100// ─── Session warmup & refresh ─────────────────────────────────────────────────
2101
2102/// Simplified, JSON-serializable wait strategy used in [`WarmupOptions`] and
2103/// [`RefreshOptions`].
2104///
2105/// This is a serialization-friendly analogue of [`WaitUntil`].  Use
2106/// [`WarmupWait::into_wait_until`] to convert before calling
2107/// [`PageHandle::navigate`].
2108#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
2109#[serde(rename_all = "snake_case")]
2110pub enum WarmupWait {
2111    /// Wait until the HTML is fully parsed (`DOMContentLoaded`).  This is the
2112    /// default and works for most pages.
2113    #[default]
2114    DomContentLoaded,
2115    /// Wait until there are no more than two in-flight network requests for at
2116    /// least 500 ms after navigation.
2117    NetworkIdle,
2118}
2119
2120impl WarmupWait {
2121    /// Convert into the lower-level [`WaitUntil`] enum.
2122    #[must_use]
2123    pub const fn into_wait_until(self) -> WaitUntil {
2124        match self {
2125            Self::DomContentLoaded => WaitUntil::DomContentLoaded,
2126            Self::NetworkIdle => WaitUntil::NetworkIdle,
2127        }
2128    }
2129}
2130
2131/// Options for [`PageHandle::warmup`].
2132///
2133/// # Example
2134///
2135/// ```
2136/// use stygian_browser::page::{WarmupOptions, WarmupWait};
2137///
2138/// let opts = WarmupOptions {
2139///     url: "https://example.com".to_string(),
2140///     wait: WarmupWait::DomContentLoaded,
2141///     timeout_ms: 30_000,
2142///     stabilize_ms: 500,
2143/// };
2144/// assert_eq!(opts.timeout_ms, 30_000);
2145/// ```
2146#[derive(Debug, Clone, Serialize, Deserialize)]
2147pub struct WarmupOptions {
2148    /// The URL to navigate to during warmup.
2149    pub url: String,
2150    /// Wait strategy applied after the navigation commit (default:
2151    /// `DomContentLoaded`).
2152    #[serde(default)]
2153    pub wait: WarmupWait,
2154    /// Navigation timeout in milliseconds.  Default: `30 000`.
2155    #[serde(default = "WarmupOptions::default_timeout_ms")]
2156    pub timeout_ms: u64,
2157    /// Additional pause after navigation to let dynamic resources (XHR,
2158    /// lazy-loaded images) settle, in milliseconds.  `0` disables the
2159    /// stabilization step (default).
2160    #[serde(default)]
2161    pub stabilize_ms: u64,
2162}
2163
2164impl WarmupOptions {
2165    /// Returns the default navigation timeout (30 000 ms).
2166    #[must_use]
2167    pub const fn default_timeout_ms() -> u64 {
2168        30_000
2169    }
2170}
2171
2172impl Default for WarmupOptions {
2173    fn default() -> Self {
2174        Self {
2175            url: String::new(),
2176            wait: WarmupWait::DomContentLoaded,
2177            timeout_ms: Self::default_timeout_ms(),
2178            stabilize_ms: 0,
2179        }
2180    }
2181}
2182
2183/// Diagnostic report produced by [`PageHandle::warmup`].
2184///
2185/// # Example
2186///
2187/// ```
2188/// use stygian_browser::page::WarmupReport;
2189/// let report = WarmupReport {
2190///     url: "https://example.com".to_string(),
2191///     elapsed_ms: 250,
2192///     status_code: Some(200),
2193///     title: "Example Domain".to_string(),
2194///     stabilized: false,
2195/// };
2196/// assert_eq!(report.status_code, Some(200));
2197/// ```
2198#[derive(Debug, Clone, Serialize, Deserialize)]
2199pub struct WarmupReport {
2200    /// The URL that was warmed.
2201    pub url: String,
2202    /// Elapsed wall-time in milliseconds.
2203    pub elapsed_ms: u64,
2204    /// HTTP status code of the warmup navigation, if captured by the
2205    /// `Network.responseReceived` listener.
2206    pub status_code: Option<u16>,
2207    /// Page title after warmup navigation.
2208    pub title: String,
2209    /// Whether a stabilization pause (`stabilize_ms > 0`) was applied after
2210    /// navigation.
2211    pub stabilized: bool,
2212}
2213
2214/// Options for [`PageHandle::refresh`].
2215///
2216/// # Example
2217///
2218/// ```
2219/// use stygian_browser::page::{RefreshOptions, WarmupWait};
2220///
2221/// let opts = RefreshOptions {
2222///     wait: WarmupWait::DomContentLoaded,
2223///     timeout_ms: 15_000,
2224///     reset_connection: true,
2225/// };
2226/// assert!(opts.reset_connection);
2227/// ```
2228#[derive(Debug, Clone, Serialize, Deserialize)]
2229pub struct RefreshOptions {
2230    /// Wait strategy applied after the reload (default: `DomContentLoaded`).
2231    #[serde(default)]
2232    pub wait: WarmupWait,
2233    /// Reload timeout in milliseconds.  Default: `30 000`.
2234    #[serde(default = "RefreshOptions::default_timeout_ms")]
2235    pub timeout_ms: u64,
2236    /// When `true`, re-navigates to the current URL rather than issuing a
2237    /// browser-level reload.  This signals to the calling code that a new TCP
2238    /// connection is desired while cookies and storage are retained in the
2239    /// browser process.  Default: `false`.
2240    #[serde(default)]
2241    pub reset_connection: bool,
2242}
2243
2244impl RefreshOptions {
2245    /// Returns the default reload timeout (30 000 ms).
2246    #[must_use]
2247    pub const fn default_timeout_ms() -> u64 {
2248        30_000
2249    }
2250}
2251
2252impl Default for RefreshOptions {
2253    fn default() -> Self {
2254        Self {
2255            wait: WarmupWait::DomContentLoaded,
2256            timeout_ms: Self::default_timeout_ms(),
2257            reset_connection: false,
2258        }
2259    }
2260}
2261
2262/// Diagnostic report produced by [`PageHandle::refresh`].
2263///
2264/// # Example
2265///
2266/// ```
2267/// use stygian_browser::page::RefreshReport;
2268/// let report = RefreshReport {
2269///     url: "https://example.com".to_string(),
2270///     elapsed_ms: 180,
2271///     status_code: Some(200),
2272/// };
2273/// assert_eq!(report.elapsed_ms, 180);
2274/// ```
2275#[derive(Debug, Clone, Serialize, Deserialize)]
2276pub struct RefreshReport {
2277    /// URL of the page after the refresh navigation.
2278    pub url: String,
2279    /// Elapsed wall-time in milliseconds.
2280    pub elapsed_ms: u64,
2281    /// HTTP status code of the refresh navigation, if captured.
2282    pub status_code: Option<u16>,
2283}
2284
2285// ─── PageHandle warmup / refresh ──────────────────────────────────────────────
2286
2287impl PageHandle {
2288    /// Warm up a browser session by navigating to `options.url` and
2289    /// optionally waiting for dynamic resources to settle.
2290    ///
2291    /// Warmup is **idempotent**: calling it repeatedly re-navigates and
2292    /// re-warms the same session without adverse side effects.
2293    ///
2294    /// # Errors
2295    ///
2296    /// Returns [`BrowserError::NavigationFailed`] if the navigation times out
2297    /// or the underlying CDP call fails.
2298    ///
2299    /// # Example
2300    ///
2301    /// ```no_run
2302    /// # async fn run() -> stygian_browser::error::Result<()> {
2303    /// use stygian_browser::{BrowserPool, BrowserConfig};
2304    /// use stygian_browser::page::{WarmupOptions, WarmupWait};
2305    ///
2306    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
2307    /// let handle = pool.acquire().await?;
2308    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
2309    ///
2310    /// let report = page.warmup(WarmupOptions {
2311    ///     url: "https://example.com".to_string(),
2312    ///     wait: WarmupWait::DomContentLoaded,
2313    ///     timeout_ms: 30_000,
2314    ///     stabilize_ms: 500,
2315    /// }).await?;
2316    /// println!("warmed in {}ms: {}", report.elapsed_ms, report.title);
2317    /// handle.release().await;
2318    /// # Ok(())
2319    /// # }
2320    /// ```
2321    pub async fn warmup(&mut self, options: WarmupOptions) -> Result<WarmupReport> {
2322        let start = std::time::Instant::now();
2323        let nav_timeout = Duration::from_millis(options.timeout_ms);
2324        self.navigate(
2325            &options.url,
2326            options.wait.clone().into_wait_until(),
2327            nav_timeout,
2328        )
2329        .await?;
2330        let status_code = self.status_code()?;
2331        let title = self.title().await.unwrap_or_default();
2332        let stabilized = options.stabilize_ms > 0;
2333        if stabilized {
2334            tokio::time::sleep(Duration::from_millis(options.stabilize_ms)).await;
2335        }
2336        let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
2337        Ok(WarmupReport {
2338            url: options.url,
2339            elapsed_ms,
2340            status_code,
2341            title,
2342            stabilized,
2343        })
2344    }
2345
2346    /// Refresh the current page, retaining all in-browser session state
2347    /// (cookies, `localStorage`, `sessionStorage`).
2348    ///
2349    /// When `options.reset_connection` is `false` (default) a standard
2350    /// CDP reload is issued.  When `true`, the current URL is re-navigated,
2351    /// which expresses the caller's intent to force a new underlying TCP/TLS
2352    /// connection while keeping all browser-side state intact.
2353    ///
2354    /// Refresh is **idempotent**: repeated calls simply reload the page again.
2355    ///
2356    /// # Errors
2357    ///
2358    /// Returns [`BrowserError::NavigationFailed`] if the current URL cannot be
2359    /// determined or the reload times out.
2360    ///
2361    /// # Example
2362    ///
2363    /// ```no_run
2364    /// # async fn run() -> stygian_browser::error::Result<()> {
2365    /// use stygian_browser::{BrowserPool, BrowserConfig};
2366    /// use stygian_browser::page::{RefreshOptions, WaitUntil};
2367    ///
2368    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
2369    /// let handle = pool.acquire().await?;
2370    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
2371    /// page.navigate(
2372    ///     "https://example.com",
2373    ///     WaitUntil::DomContentLoaded,
2374    ///     std::time::Duration::from_secs(30),
2375    /// ).await?;
2376    ///
2377    /// let report = page.refresh(RefreshOptions::default()).await?;
2378    /// println!("refreshed in {}ms", report.elapsed_ms);
2379    /// handle.release().await;
2380    /// # Ok(())
2381    /// # }
2382    /// ```
2383    pub async fn refresh(&mut self, options: RefreshOptions) -> Result<RefreshReport> {
2384        let start = std::time::Instant::now();
2385        let nav_timeout = Duration::from_millis(options.timeout_ms);
2386        let wait = options.wait.clone().into_wait_until();
2387        // Resolve the current URL before any navigation changes it.
2388        let current_url = self.url().await?;
2389        if current_url.is_empty() || current_url == "about:blank" {
2390            return Err(BrowserError::NavigationFailed {
2391                url: current_url,
2392                reason: "page has not been navigated yet; call warmup() or navigate() first"
2393                    .to_string(),
2394            });
2395        }
2396        // Both code paths navigate to the same URL.  `reset_connection: true`
2397        // expresses the *intent* to use a new TCP connection; the browser is free
2398        // to reuse or create a new connection as its connection pool dictates.
2399        self.navigate(&current_url, wait, nav_timeout).await?;
2400        let status_code = self.status_code()?;
2401        let url = self.url().await?;
2402        let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
2403        Ok(RefreshReport {
2404            url,
2405            elapsed_ms,
2406            status_code,
2407        })
2408    }
2409}
2410
2411// ─── Rust-side CDP Node → HTML serializer (Recursive fallback) ───────────────
2412
2413/// CDP `DOM.Node.nodeType` constants (matches the WHATWG DOM spec).
2414mod node_type {
2415    /// `Element` node.
2416    pub const ELEMENT: i64 = 1;
2417    /// Text node (`Text`).
2418    pub const TEXT: i64 = 3;
2419    /// `CDATASection` node.
2420    pub const CDATA_SECTION: i64 = 4;
2421    /// `ProcessingInstruction` node.
2422    pub const PROCESSING_INSTRUCTION: i64 = 7;
2423    /// `Comment` node.
2424    pub const COMMENT: i64 = 8;
2425    /// `Document` node.
2426    pub const DOCUMENT: i64 = 9;
2427    /// `DocumentType` node.
2428    pub const DOCUMENT_TYPE: i64 = 10;
2429    /// `DocumentFragment` node.
2430    pub const DOCUMENT_FRAGMENT: i64 = 11;
2431}
2432
2433/// HTML elements that have no closing tag (per the WHATWG spec).
2434const VOID_ELEMENTS: &[&str] = &[
2435    "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
2436    "source", "track", "wbr",
2437];
2438
2439/// Serialise a CDP `Node` subtree (rooted at `node`) to an HTML string.
2440///
2441/// Used by [`NodeHandle::outer_html_via_rust_walk`] as the
2442/// [`OuterHtmlStrategy::Recursive`] fallback when `DOM.getOuterHTML`
2443/// returns an empty payload or errors out. The implementation is a
2444/// straightforward depth-first walk that mirrors what Chromium's own
2445/// `Element.outerHTML` accessor produces for the same tree:
2446/// - element nodes emit `<tag attrs>children</tag>`. [`VOID_ELEMENTS`]
2447///   emit `<tag attrs>` with no closing slash and no children, matching
2448///   Chromium's `outerHTML` byte-for-byte (which uses HTML5 syntax, not
2449///   XHTML self-closing).
2450/// - text nodes are HTML-escaped
2451/// - comment nodes emit `<!--value-->`
2452/// - `<!DOCTYPE …>` declarations are emitted for `DocumentType` roots
2453/// - `Document` / `DocumentFragment` roots emit only their children
2454///   (no outer wrapper), matching how `XMLSerializer` treats them
2455/// - `template` content (`template_content`) is inlined as additional
2456///   children of the `<template>` element, mirroring browser behaviour
2457/// - shadow roots are inlined as additional children of their host
2458///   (no `<shadowroot>` wrapper, since shadow content is what
2459///   `outerHTML` is expected to surface)
2460///
2461/// This serializer is not intended to be a perfect drop-in for
2462/// `Element.outerHTML` on every edge case (`CDATA`, `ProcessingInstruction`,
2463/// and namespace prefixes are simplified) — it is the second-line fallback
2464/// for the `Recursive` strategy and only fires when `DOM.getOuterHTML`
2465/// itself fails.
2466fn serialize_node_tree(node: &chromiumoxide::cdp::browser_protocol::dom::Node) -> String {
2467    let mut out = String::new();
2468    serialize_node_into(&mut out, node);
2469    out
2470}
2471
2472fn serialize_node_into(out: &mut String, node: &chromiumoxide::cdp::browser_protocol::dom::Node) {
2473    match node.node_type {
2474        node_type::ELEMENT => {
2475            let tag = node.local_name.as_str();
2476            out.push('<');
2477            out.push_str(tag);
2478            if let Some(attrs) = &node.attributes {
2479                for pair in attrs.chunks_exact(2) {
2480                    if let [name, value] = pair {
2481                        out.push(' ');
2482                        escape_attr_name(out, name);
2483                        out.push_str("=\"");
2484                        escape_attr_value(out, value);
2485                        out.push('"');
2486                    }
2487                }
2488            }
2489            if VOID_ELEMENTS.contains(&tag) {
2490                out.push('>');
2491                return;
2492            }
2493            out.push('>');
2494            serialize_inline_children(out, node);
2495            out.push_str("</");
2496            out.push_str(tag);
2497            out.push('>');
2498        }
2499        node_type::TEXT => {
2500            escape_text(out, &node.node_value);
2501        }
2502        node_type::COMMENT => {
2503            out.push_str("<!--");
2504            out.push_str(&node.node_value);
2505            out.push_str("-->");
2506        }
2507        node_type::DOCUMENT | node_type::DOCUMENT_FRAGMENT => {
2508            serialize_inline_children(out, node);
2509        }
2510        node_type::DOCUMENT_TYPE => {
2511            out.push_str("<!DOCTYPE ");
2512            out.push_str(&node.node_name);
2513            if let Some(public_id) = &node.public_id {
2514                out.push(' ');
2515                out.push_str(public_id);
2516            }
2517            if let Some(system_id) = &node.system_id {
2518                out.push(' ');
2519                out.push_str(system_id);
2520            }
2521            out.push('>');
2522        }
2523        node_type::CDATA_SECTION => {
2524            out.push_str("<![CDATA[");
2525            out.push_str(&node.node_value);
2526            out.push_str("]]>");
2527        }
2528        node_type::PROCESSING_INSTRUCTION => {
2529            out.push_str("<?");
2530            out.push_str(&node.node_name);
2531            if !node.node_value.is_empty() {
2532                out.push(' ');
2533                out.push_str(&node.node_value);
2534            }
2535            out.push_str("?>");
2536        }
2537        _ => {
2538            if !node.node_value.is_empty() {
2539                escape_text(out, &node.node_value);
2540            }
2541        }
2542    }
2543}
2544
2545/// Emit the inline children of a node (regular `children`, plus
2546/// `template_content`, `shadow_roots`, and `content_document`) in the order
2547/// Chromium's own `Element.outerHTML` accessor surfaces them.
2548fn serialize_inline_children(
2549    out: &mut String,
2550    node: &chromiumoxide::cdp::browser_protocol::dom::Node,
2551) {
2552    if let Some(children) = &node.children {
2553        for child in children {
2554            serialize_node_into(out, child);
2555        }
2556    }
2557    if let Some(template_content) = &node.template_content {
2558        serialize_node_into(out, template_content);
2559    }
2560    if let Some(shadow_roots) = &node.shadow_roots {
2561        for shadow in shadow_roots {
2562            serialize_node_into(out, shadow);
2563        }
2564    }
2565    if let Some(content_document) = &node.content_document {
2566        serialize_node_into(out, content_document);
2567    }
2568}
2569
2570/// Escape a text node payload for safe inclusion in HTML element content.
2571fn escape_text(out: &mut String, value: &str) {
2572    for ch in value.chars() {
2573        match ch {
2574            '&' => out.push_str("&amp;"),
2575            '<' => out.push_str("&lt;"),
2576            '>' => out.push_str("&gt;"),
2577            _ => out.push(ch),
2578        }
2579    }
2580}
2581
2582/// Escape an attribute name (same rules as text — `&` and `<` cannot appear
2583/// in well-formed attribute names but are escaped defensively).
2584fn escape_attr_name(out: &mut String, value: &str) {
2585    for ch in value.chars() {
2586        match ch {
2587            '&' => out.push_str("&amp;"),
2588            '<' => out.push_str("&lt;"),
2589            '"' => out.push_str("&quot;"),
2590            _ => out.push(ch),
2591        }
2592    }
2593}
2594
2595/// Escape an attribute value for inclusion inside `"…"` quoted form.
2596fn escape_attr_value(out: &mut String, value: &str) {
2597    for ch in value.chars() {
2598        match ch {
2599            '&' => out.push_str("&amp;"),
2600            '<' => out.push_str("&lt;"),
2601            '"' => out.push_str("&quot;"),
2602            _ => out.push(ch),
2603        }
2604    }
2605}
2606
2607// ─── Tests ────────────────────────────────────────────────────────────────────
2608
2609#[cfg(test)]
2610mod tests {
2611    use super::*;
2612
2613    #[test]
2614    fn resource_filter_block_media_blocks_image() {
2615        let filter = ResourceFilter::block_media();
2616        assert!(filter.should_block("Image"));
2617        assert!(filter.should_block("Font"));
2618        assert!(filter.should_block("Stylesheet"));
2619        assert!(filter.should_block("Media"));
2620        assert!(!filter.should_block("Script"));
2621        assert!(!filter.should_block("XHR"));
2622    }
2623
2624    #[test]
2625    fn resource_filter_case_insensitive() {
2626        let filter = ResourceFilter::block_images_and_fonts();
2627        assert!(filter.should_block("image")); // lowercase
2628        assert!(filter.should_block("IMAGE")); // uppercase
2629        assert!(!filter.should_block("Stylesheet"));
2630    }
2631
2632    #[test]
2633    fn resource_filter_builder_chain() {
2634        let filter = ResourceFilter::default()
2635            .block(ResourceType::Image)
2636            .block(ResourceType::Font);
2637        assert!(filter.should_block("Image"));
2638        assert!(filter.should_block("Font"));
2639        assert!(!filter.should_block("Stylesheet"));
2640    }
2641
2642    #[test]
2643    fn resource_filter_dedup_block() {
2644        let filter = ResourceFilter::default()
2645            .block(ResourceType::Image)
2646            .block(ResourceType::Image); // duplicate
2647        assert_eq!(filter.blocked.len(), 1);
2648    }
2649
2650    #[test]
2651    fn resource_filter_is_empty_when_default() {
2652        assert!(ResourceFilter::default().is_empty());
2653        assert!(!ResourceFilter::block_media().is_empty());
2654    }
2655
2656    #[test]
2657    fn wait_until_selector_stores_string() {
2658        let w = WaitUntil::Selector("#foo".to_string());
2659        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
2660    }
2661
2662    #[test]
2663    fn resource_type_cdp_str() {
2664        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
2665        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
2666        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
2667        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
2668    }
2669
2670    #[test]
2671    fn page_handle_is_send_sync() {
2672        fn assert_send<T: Send>() {}
2673        fn assert_sync<T: Sync>() {}
2674        assert_send::<PageHandle>();
2675        assert_sync::<PageHandle>();
2676    }
2677
2678    /// Verify the resilient extractor correctly classifies `ExtractionError`
2679    /// variants — `Missing` must be treated as "skip", others as hard errors.
2680    #[cfg(feature = "extract")]
2681    #[test]
2682    fn extraction_error_missing_is_skippable() {
2683        use crate::extract::ExtractionError;
2684
2685        let missing = ExtractionError::Missing {
2686            field: "title",
2687            selector: "h1",
2688        };
2689        assert!(
2690            matches!(missing, ExtractionError::Missing { .. }),
2691            "ExtractionError::Missing should be the skip variant"
2692        );
2693
2694        // Non-Missing variants should NOT match the skip pattern
2695        let nested = ExtractionError::Nested {
2696            field: "link",
2697            source: Box::new(ExtractionError::Missing {
2698                field: "href",
2699                selector: "a",
2700            }),
2701        };
2702        assert!(
2703            !matches!(nested, ExtractionError::Missing { .. }),
2704            "ExtractionError::Nested must not match Missing"
2705        );
2706    }
2707
2708    /// `Option<u16>` are pure-logic invariants testable without a live browser.
2709    #[test]
2710    fn status_code_sentinel_zero_maps_to_none() {
2711        use std::sync::atomic::{AtomicU16, Ordering};
2712        let atom = AtomicU16::new(0);
2713        let code = atom.load(Ordering::Acquire);
2714        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
2715    }
2716
2717    #[test]
2718    fn status_code_non_zero_maps_to_some() {
2719        use std::sync::atomic::{AtomicU16, Ordering};
2720        for &expected in &[200u16, 301, 404, 503] {
2721            let atom = AtomicU16::new(expected);
2722            let code = atom.load(Ordering::Acquire);
2723            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
2724        }
2725    }
2726
2727    // ── NodeHandle pure-logic tests ───────────────────────────────────────────
2728
2729    /// `attr_map` relies on `chunks_exact(2)` — verify the pairing logic is
2730    /// correct without a live browser by exercising it directly.
2731    #[test]
2732    fn attr_map_chunking_pairs_correctly() {
2733        let flat = [
2734            "id".to_string(),
2735            "main".to_string(),
2736            "data-ux".to_string(),
2737            "Section".to_string(),
2738            "class".to_string(),
2739            "container".to_string(),
2740        ];
2741        let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
2742        for pair in flat.chunks_exact(2) {
2743            if let [name, value] = pair {
2744                map.insert(name.clone(), value.clone());
2745            }
2746        }
2747        assert_eq!(map.get("id").map(String::as_str), Some("main"));
2748        assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
2749        assert_eq!(map.get("class").map(String::as_str), Some("container"));
2750        assert_eq!(map.len(), 3);
2751    }
2752
2753    /// gracefully — the trailing element is silently ignored.
2754    #[test]
2755    fn attr_map_chunking_ignores_odd_trailing() {
2756        let flat = ["orphan".to_string()]; // no value
2757        let mut map = std::collections::HashMap::new();
2758        for pair in flat.chunks_exact(2) {
2759            if let [name, value] = pair {
2760                map.insert(name.clone(), value.clone());
2761            }
2762        }
2763        assert!(map.is_empty());
2764    }
2765
2766    /// Empty flat list → empty map.
2767    #[test]
2768    fn attr_map_chunking_empty_input() {
2769        let flat: Vec<String> = vec![];
2770        let map: std::collections::HashMap<String, String> = flat
2771            .chunks_exact(2)
2772            .filter_map(|pair| {
2773                if let [name, value] = pair {
2774                    Some((name.clone(), value.clone()))
2775                } else {
2776                    None
2777                }
2778            })
2779            .collect();
2780        assert!(map.is_empty());
2781    }
2782
2783    #[test]
2784    fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
2785        let json = r#"["p","article","body","html"]"#;
2786        let result: Vec<String> = serde_json::from_str(json)?;
2787        assert_eq!(result, ["p", "article", "body", "html"]);
2788        Ok(())
2789    }
2790
2791    #[test]
2792    fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
2793        let json = "[]";
2794        let result: Vec<String> = serde_json::from_str(json)?;
2795        assert!(result.is_empty());
2796        Ok(())
2797    }
2798
2799    /// `"div::parent"`) must surface that suffix in its `Display` output so
2800    /// callers can locate the failed traversal in logs.
2801    #[test]
2802    fn traversal_selector_suffix_in_stale_error() {
2803        let e = crate::error::BrowserError::StaleNode {
2804            selector: "div::parent".to_string(),
2805        };
2806        let msg = e.to_string();
2807        assert!(
2808            msg.contains("div::parent"),
2809            "StaleNode display must include the full selector; got: {msg}"
2810        );
2811    }
2812
2813    #[test]
2814    fn traversal_next_suffix_in_stale_error() {
2815        let e = crate::error::BrowserError::StaleNode {
2816            selector: "li.price::next".to_string(),
2817        };
2818        assert!(e.to_string().contains("li.price::next"));
2819    }
2820
2821    #[test]
2822    fn traversal_prev_suffix_in_stale_error() {
2823        let e = crate::error::BrowserError::StaleNode {
2824            selector: "td.label::prev".to_string(),
2825        };
2826        assert!(e.to_string().contains("td.label::prev"));
2827    }
2828
2829    // ── OuterHtmlStrategy / OuterHtmlResult type tests (T101) ─────────────────
2830
2831    #[test]
2832    fn outer_html_strategy_default_is_current() {
2833        assert_eq!(OuterHtmlStrategy::default(), OuterHtmlStrategy::Current);
2834    }
2835
2836    #[test]
2837    fn outer_html_strategy_as_str_matches_variant() {
2838        assert_eq!(OuterHtmlStrategy::Current.as_str(), "Current");
2839        assert_eq!(OuterHtmlStrategy::Recursive.as_str(), "Recursive");
2840    }
2841
2842    #[test]
2843    fn outer_html_strategy_display_matches_as_str() {
2844        assert_eq!(
2845            format!("{}", OuterHtmlStrategy::Current),
2846            OuterHtmlStrategy::Current.as_str()
2847        );
2848        assert_eq!(
2849            format!("{}", OuterHtmlStrategy::Recursive),
2850            OuterHtmlStrategy::Recursive.as_str()
2851        );
2852    }
2853
2854    #[test]
2855    fn outer_html_strategy_is_copy_and_eq() {
2856        let s = OuterHtmlStrategy::Recursive;
2857        let copy = s;
2858        assert_eq!(s, copy);
2859        assert_eq!(s, OuterHtmlStrategy::Recursive);
2860        assert_ne!(s, OuterHtmlStrategy::Current);
2861    }
2862
2863    #[test]
2864    fn outer_html_strategy_all_iterates_both_variants() {
2865        let all = OuterHtmlStrategy::all();
2866        assert_eq!(all.len(), 2);
2867        assert_eq!(all[0], OuterHtmlStrategy::Current);
2868        assert_eq!(all[1], OuterHtmlStrategy::Recursive);
2869    }
2870
2871    #[test]
2872    fn outer_html_strategy_serialize_round_trip()
2873    -> std::result::Result<(), Box<dyn std::error::Error>> {
2874        for variant in OuterHtmlStrategy::all() {
2875            let json = serde_json::to_string(&variant)?;
2876            let restored: OuterHtmlStrategy = serde_json::from_str(&json)?;
2877            assert_eq!(restored, variant);
2878        }
2879        Ok(())
2880    }
2881
2882    #[test]
2883    fn outer_html_result_content_returns_some_for_content() {
2884        let r = OuterHtmlResult::Content("<div/>".to_string());
2885        assert_eq!(r.content(), Some("<div/>"));
2886    }
2887
2888    #[test]
2889    fn outer_html_result_content_returns_none_for_empty() {
2890        assert_eq!(OuterHtmlResult::Empty.content(), None);
2891    }
2892
2893    #[test]
2894    fn outer_html_result_content_returns_none_for_failed() {
2895        let r = OuterHtmlResult::Failed {
2896            backends: vec!["DOM.getOuterHTML"],
2897        };
2898        assert_eq!(r.content(), None);
2899    }
2900
2901    #[test]
2902    fn outer_html_result_is_empty_variants() {
2903        assert!(OuterHtmlResult::Empty.is_empty());
2904        assert!(
2905            OuterHtmlResult::Failed {
2906                backends: vec!["a"]
2907            }
2908            .is_empty()
2909        );
2910        assert!(!OuterHtmlResult::Content("<x/>".to_string()).is_empty());
2911        assert!(OuterHtmlResult::Content(String::new()).is_empty());
2912    }
2913
2914    #[test]
2915    fn outer_html_result_display_includes_state() {
2916        assert_eq!(format!("{}", OuterHtmlResult::Empty), "Empty");
2917        assert_eq!(
2918            format!("{}", OuterHtmlResult::Content("<div/>".to_string())),
2919            "Content(6 bytes)"
2920        );
2921        let failed = OuterHtmlResult::Failed {
2922            backends: vec!["DOM.getOuterHTML", "DOM.describeNode-walk"],
2923        };
2924        let s = format!("{failed}");
2925        assert!(s.contains("DOM.getOuterHTML"));
2926        assert!(s.contains("DOM.describeNode-walk"));
2927    }
2928
2929    #[test]
2930    fn outer_html_result_serializes_each_variant()
2931    -> std::result::Result<(), Box<dyn std::error::Error>> {
2932        let empty_json = serde_json::to_string(&OuterHtmlResult::Empty)?;
2933        assert_eq!(empty_json, "\"Empty\"");
2934
2935        let content_json =
2936            serde_json::to_string(&OuterHtmlResult::Content("<p>x</p>".to_string()))?;
2937        assert_eq!(content_json, r#"{"Content":"<p>x</p>"}"#);
2938
2939        let failed_json = serde_json::to_string(&OuterHtmlResult::Failed {
2940            backends: vec!["DOM.getOuterHTML", "DOM.describeNode-walk"],
2941        })?;
2942        assert_eq!(
2943            failed_json,
2944            r#"{"Failed":{"backends":["DOM.getOuterHTML","DOM.describeNode-walk"]}}"#
2945        );
2946        Ok(())
2947    }
2948
2949    // ── Rust-side CDP Node → HTML serializer tests (T101) ─────────────────────
2950
2951    use chromiumoxide::cdp::browser_protocol::dom::{BackendNodeId, Node, NodeId};
2952
2953    fn mk_node(
2954        node_type: i64,
2955        local_name: &str,
2956        node_name: &str,
2957        node_value: &str,
2958        attributes: Option<Vec<String>>,
2959        children: Option<Vec<Node>>,
2960    ) -> Node {
2961        Node {
2962            node_id: NodeId::default(),
2963            parent_id: None,
2964            backend_node_id: BackendNodeId::default(),
2965            node_type,
2966            node_name: node_name.to_string(),
2967            local_name: local_name.to_string(),
2968            node_value: node_value.to_string(),
2969            child_node_count: None,
2970            children,
2971            attributes,
2972            document_url: None,
2973            base_url: None,
2974            public_id: None,
2975            system_id: None,
2976            internal_subset: None,
2977            xml_version: None,
2978            name: None,
2979            value: None,
2980            pseudo_type: None,
2981            pseudo_identifier: None,
2982            shadow_root_type: None,
2983            frame_id: None,
2984            content_document: None,
2985            shadow_roots: None,
2986            template_content: None,
2987            pseudo_elements: None,
2988            distributed_nodes: None,
2989            is_svg: None,
2990            compatibility_mode: None,
2991            assigned_slot: None,
2992            is_scrollable: None,
2993            affected_by_starting_styles: None,
2994            adopted_style_sheets: None,
2995        }
2996    }
2997
2998    #[test]
2999    fn serialize_element_with_text_child() {
3000        let text = mk_node(node_type::TEXT, "", "", "hello", None, None);
3001        let div = mk_node(node_type::ELEMENT, "div", "DIV", "", None, Some(vec![text]));
3002        assert_eq!(serialize_node_tree(&div), "<div>hello</div>");
3003    }
3004
3005    #[test]
3006    fn serialize_element_with_attributes() {
3007        let div = mk_node(
3008            node_type::ELEMENT,
3009            "div",
3010            "DIV",
3011            "",
3012            Some(vec![
3013                "id".into(),
3014                "main".into(),
3015                "class".into(),
3016                "container wide".into(),
3017            ]),
3018            None,
3019        );
3020        assert_eq!(
3021            serialize_node_tree(&div),
3022            r#"<div id="main" class="container wide"></div>"#
3023        );
3024    }
3025
3026    #[test]
3027    fn serialize_void_element_emits_self_closing() {
3028        let img = mk_node(
3029            node_type::ELEMENT,
3030            "img",
3031            "IMG",
3032            "",
3033            Some(vec!["src".into(), "/a.png".into()]),
3034            None,
3035        );
3036        assert_eq!(serialize_node_tree(&img), r#"<img src="/a.png">"#);
3037        let br = mk_node(node_type::ELEMENT, "br", "BR", "", None, None);
3038        assert_eq!(serialize_node_tree(&br), "<br>");
3039    }
3040
3041    #[test]
3042    fn serialize_nested_elements() {
3043        let p = mk_node(
3044            node_type::ELEMENT,
3045            "p",
3046            "P",
3047            "",
3048            None,
3049            Some(vec![mk_node(
3050                node_type::TEXT,
3051                "",
3052                "",
3053                "Mesh content here",
3054                None,
3055                None,
3056            )]),
3057        );
3058        let section = mk_node(
3059            node_type::ELEMENT,
3060            "section",
3061            "SECTION",
3062            "",
3063            None,
3064            Some(vec![p]),
3065        );
3066        let html = serialize_node_tree(&section);
3067        assert_eq!(html, "<section><p>Mesh content here</p></section>");
3068    }
3069
3070    #[test]
3071    fn serialize_text_escapes_special_chars() {
3072        let n = mk_node(node_type::TEXT, "", "", "a < b && c > d", None, None);
3073        assert_eq!(serialize_node_tree(&n), "a &lt; b &amp;&amp; c &gt; d");
3074    }
3075
3076    #[test]
3077    fn serialize_attribute_value_escapes_quotes_and_amp() {
3078        let div = mk_node(
3079            node_type::ELEMENT,
3080            "div",
3081            "DIV",
3082            "",
3083            Some(vec!["title".into(), "a & b \"c\"".into()]),
3084            None,
3085        );
3086        assert_eq!(
3087            serialize_node_tree(&div),
3088            r#"<div title="a &amp; b &quot;c&quot;"></div>"#
3089        );
3090    }
3091
3092    #[test]
3093    fn serialize_attribute_name_escapes_special_chars() {
3094        let div = mk_node(
3095            node_type::ELEMENT,
3096            "div",
3097            "DIV",
3098            "",
3099            Some(vec!["weird<\"&".into(), "v".into()]),
3100            None,
3101        );
3102        assert_eq!(
3103            serialize_node_tree(&div),
3104            r#"<div weird&lt;&quot;&amp;="v"></div>"#
3105        );
3106    }
3107
3108    #[test]
3109    fn serialize_comment_node() {
3110        let n = mk_node(node_type::COMMENT, "", "", " a comment ", None, None);
3111        assert_eq!(serialize_node_tree(&n), "<!-- a comment -->");
3112    }
3113
3114    #[test]
3115    fn serialize_document_root_flattens_children() {
3116        let html = mk_node(
3117            node_type::ELEMENT,
3118            "html",
3119            "HTML",
3120            "",
3121            None,
3122            Some(vec![mk_node(
3123                node_type::ELEMENT,
3124                "body",
3125                "BODY",
3126                "",
3127                None,
3128                None,
3129            )]),
3130        );
3131        let doc = mk_node(
3132            node_type::DOCUMENT,
3133            "",
3134            "#document",
3135            "",
3136            None,
3137            Some(vec![html]),
3138        );
3139        assert_eq!(serialize_node_tree(&doc), "<html><body></body></html>");
3140    }
3141
3142    #[test]
3143    fn serialize_document_fragment_root_flattens_children() {
3144        let span = mk_node(
3145            node_type::ELEMENT,
3146            "span",
3147            "SPAN",
3148            "",
3149            None,
3150            Some(vec![mk_node(node_type::TEXT, "", "", "x", None, None)]),
3151        );
3152        let frag = mk_node(
3153            node_type::DOCUMENT_FRAGMENT,
3154            "",
3155            "#document-fragment",
3156            "",
3157            None,
3158            Some(vec![span]),
3159        );
3160        assert_eq!(serialize_node_tree(&frag), "<span>x</span>");
3161    }
3162
3163    #[test]
3164    fn serialize_doctype_node() {
3165        let dt = Node {
3166            public_id: Some("-//W3C//DTD HTML 4.01//EN".to_string()),
3167            system_id: Some("http://www.w3.org/TR/html4/strict.dtd".to_string()),
3168            ..mk_node(node_type::DOCUMENT_TYPE, "", "html", "", None, None)
3169        };
3170        assert_eq!(
3171            serialize_node_tree(&dt),
3172            "<!DOCTYPE html -//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd>"
3173        );
3174    }
3175
3176    #[test]
3177    fn serialize_doctype_node_no_ids() {
3178        let dt = mk_node(node_type::DOCUMENT_TYPE, "", "html", "", None, None);
3179        assert_eq!(serialize_node_tree(&dt), "<!DOCTYPE html>");
3180    }
3181
3182    #[test]
3183    fn serialize_cdata_section() {
3184        let n = mk_node(node_type::CDATA_SECTION, "", "", "raw & <data>", None, None);
3185        assert_eq!(serialize_node_tree(&n), "<![CDATA[raw & <data>]]>");
3186    }
3187
3188    #[test]
3189    fn serialize_processing_instruction() {
3190        let n = mk_node(
3191            node_type::PROCESSING_INSTRUCTION,
3192            "",
3193            "xml-stylesheet",
3194            "href=\"style.css\"",
3195            None,
3196            None,
3197        );
3198        assert_eq!(
3199            serialize_node_tree(&n),
3200            "<?xml-stylesheet href=\"style.css\"?>"
3201        );
3202    }
3203
3204    #[test]
3205    fn serialize_template_inlines_template_content() {
3206        let inner = mk_node(
3207            node_type::ELEMENT,
3208            "span",
3209            "SPAN",
3210            "",
3211            None,
3212            Some(vec![mk_node(node_type::TEXT, "", "", "tmpl", None, None)]),
3213        );
3214        let mut tmpl = mk_node(node_type::ELEMENT, "template", "TEMPLATE", "", None, None);
3215        tmpl.template_content = Some(Box::new(inner));
3216        assert_eq!(
3217            serialize_node_tree(&tmpl),
3218            "<template><span>tmpl</span></template>"
3219        );
3220    }
3221
3222    #[test]
3223    fn serialize_shadow_roots_inlined_into_host() {
3224        let shadow_text = mk_node(node_type::TEXT, "", "", "shadow-text", None, None);
3225        let shadow = Node {
3226            shadow_root_type: Some(chromiumoxide::cdp::browser_protocol::dom::ShadowRootType::Open),
3227            ..mk_node(
3228                node_type::DOCUMENT_FRAGMENT,
3229                "",
3230                "#document-fragment",
3231                "",
3232                None,
3233                Some(vec![mk_node(
3234                    node_type::ELEMENT,
3235                    "span",
3236                    "SPAN",
3237                    "",
3238                    None,
3239                    Some(vec![shadow_text]),
3240                )]),
3241            )
3242        };
3243        let mut host = mk_node(
3244            node_type::ELEMENT,
3245            "div",
3246            "DIV",
3247            "",
3248            None,
3249            Some(vec![mk_node(node_type::TEXT, "", "", "light", None, None)]),
3250        );
3251        host.shadow_roots = Some(vec![shadow]);
3252        assert_eq!(
3253            serialize_node_tree(&host),
3254            "<div>light<span>shadow-text</span></div>"
3255        );
3256    }
3257
3258    #[test]
3259    fn serialize_deeply_nested_subtree() {
3260        // Build a 5-level deep subtree: <a><b><c><d><e>deep</e></d></c></b></a>
3261        let tag_e = mk_node(
3262            node_type::ELEMENT,
3263            "e",
3264            "E",
3265            "",
3266            None,
3267            Some(vec![mk_node(node_type::TEXT, "", "", "deep", None, None)]),
3268        );
3269        let tag_d = mk_node(node_type::ELEMENT, "d", "D", "", None, Some(vec![tag_e]));
3270        let tag_c = mk_node(node_type::ELEMENT, "c", "C", "", None, Some(vec![tag_d]));
3271        let tag_b = mk_node(node_type::ELEMENT, "b", "B", "", None, Some(vec![tag_c]));
3272        let tag_a = mk_node(node_type::ELEMENT, "a", "A", "", None, Some(vec![tag_b]));
3273        assert_eq!(
3274            serialize_node_tree(&tag_a),
3275            "<a><b><c><d><e>deep</e></d></c></b></a>"
3276        );
3277    }
3278
3279    #[test]
3280    fn serialize_element_with_text_and_element_children() {
3281        let span = mk_node(
3282            node_type::ELEMENT,
3283            "span",
3284            "SPAN",
3285            "",
3286            None,
3287            Some(vec![mk_node(node_type::TEXT, "", "", "inline", None, None)]),
3288        );
3289        let div = mk_node(
3290            node_type::ELEMENT,
3291            "div",
3292            "DIV",
3293            "",
3294            None,
3295            Some(vec![
3296                mk_node(node_type::TEXT, "", "", "before", None, None),
3297                span,
3298                mk_node(node_type::TEXT, "", "", "after", None, None),
3299            ]),
3300        );
3301        assert_eq!(
3302            serialize_node_tree(&div),
3303            "<div>before<span>inline</span>after</div>"
3304        );
3305    }
3306
3307    #[test]
3308    fn serialize_attribute_pairs_drop_orphans() {
3309        // An odd-length attribute list (one name with no value) must not crash.
3310        let div = mk_node(
3311            node_type::ELEMENT,
3312            "div",
3313            "DIV",
3314            "",
3315            Some(vec!["orphan".into()]),
3316            None,
3317        );
3318        // The orphan name has no value so it is silently skipped (pairs of 2).
3319        assert_eq!(serialize_node_tree(&div), "<div></div>");
3320    }
3321
3322    // ── Warmup / Refresh type tests ───────────────────────────────────────────
3323
3324    #[test]
3325    fn warmup_options_defaults() {
3326        let opts = WarmupOptions::default();
3327        assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
3328        assert_eq!(opts.timeout_ms, WarmupOptions::default_timeout_ms());
3329        assert_eq!(opts.stabilize_ms, 0);
3330    }
3331
3332    #[test]
3333    fn warmup_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
3334    {
3335        let opts = WarmupOptions {
3336            url: "https://example.com".to_string(),
3337            wait: WarmupWait::NetworkIdle,
3338            timeout_ms: 15_000,
3339            stabilize_ms: 250,
3340        };
3341        let json = serde_json::to_string(&opts)?;
3342        let restored: WarmupOptions = serde_json::from_str(&json)?;
3343        assert_eq!(restored.url, "https://example.com");
3344        assert_eq!(restored.wait, WarmupWait::NetworkIdle);
3345        assert_eq!(restored.timeout_ms, 15_000);
3346        assert_eq!(restored.stabilize_ms, 250);
3347        Ok(())
3348    }
3349
3350    #[test]
3351    fn warmup_wait_default_is_dom_content_loaded() {
3352        assert_eq!(WarmupWait::default(), WarmupWait::DomContentLoaded);
3353    }
3354
3355    #[test]
3356    fn warmup_wait_into_wait_until_variants() {
3357        assert!(matches!(
3358            WarmupWait::DomContentLoaded.into_wait_until(),
3359            WaitUntil::DomContentLoaded
3360        ));
3361        assert!(matches!(
3362            WarmupWait::NetworkIdle.into_wait_until(),
3363            WaitUntil::NetworkIdle
3364        ));
3365    }
3366
3367    #[test]
3368    fn refresh_options_defaults() {
3369        let opts = RefreshOptions::default();
3370        assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
3371        assert_eq!(opts.timeout_ms, RefreshOptions::default_timeout_ms());
3372        assert!(!opts.reset_connection);
3373    }
3374
3375    #[test]
3376    fn refresh_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
3377    {
3378        let opts = RefreshOptions {
3379            wait: WarmupWait::NetworkIdle,
3380            timeout_ms: 10_000,
3381            reset_connection: true,
3382        };
3383        let json = serde_json::to_string(&opts)?;
3384        let restored: RefreshOptions = serde_json::from_str(&json)?;
3385        assert_eq!(restored.wait, WarmupWait::NetworkIdle);
3386        assert_eq!(restored.timeout_ms, 10_000);
3387        assert!(restored.reset_connection);
3388        Ok(())
3389    }
3390
3391    #[test]
3392    fn warmup_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>> {
3393        let report = WarmupReport {
3394            url: "https://example.com".to_string(),
3395            elapsed_ms: 320,
3396            status_code: Some(200),
3397            title: "Example Domain".to_string(),
3398            stabilized: true,
3399        };
3400        let json = serde_json::to_string(&report)?;
3401        let restored: WarmupReport = serde_json::from_str(&json)?;
3402        assert_eq!(restored.url, "https://example.com");
3403        assert_eq!(restored.elapsed_ms, 320);
3404        assert_eq!(restored.status_code, Some(200));
3405        assert_eq!(restored.title, "Example Domain");
3406        assert!(restored.stabilized);
3407        Ok(())
3408    }
3409
3410    #[test]
3411    fn refresh_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
3412    {
3413        let report = RefreshReport {
3414            url: "https://example.com/".to_string(),
3415            elapsed_ms: 180,
3416            status_code: Some(304),
3417        };
3418        let json = serde_json::to_string(&report)?;
3419        let restored: RefreshReport = serde_json::from_str(&json)?;
3420        assert_eq!(restored.url, "https://example.com/");
3421        assert_eq!(restored.elapsed_ms, 180);
3422        assert_eq!(restored.status_code, Some(304));
3423        Ok(())
3424    }
3425
3426    #[test]
3427    fn warmup_options_missing_stabilize_ms_defaults_to_zero()
3428    -> std::result::Result<(), Box<dyn std::error::Error>> {
3429        // stabilize_ms has `#[serde(default)]`; omitting it from JSON should
3430        // deserialize to 0 rather than erroring.
3431        let json = r#"{"url":"https://example.com","timeout_ms":30000}"#;
3432        let opts: WarmupOptions = serde_json::from_str(json)?;
3433        assert_eq!(opts.stabilize_ms, 0);
3434        Ok(())
3435    }
3436
3437    // ── Integration tests (require live Chrome — skipped in CI) ──────────────
3438
3439    /// Warm up a page then immediately extract content from the same origin.
3440    #[test]
3441    #[ignore = "requires live Chrome"]
3442    #[allow(clippy::expect_used)]
3443    fn integration_warmup_then_extraction() {
3444        let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
3445        rt.block_on(async {
3446            use crate::{BrowserConfig, BrowserPool};
3447            let pool = BrowserPool::new(BrowserConfig::default())
3448                .await
3449                .expect("pool");
3450            let handle = pool.acquire().await.expect("handle");
3451            let mut page = handle
3452                .browser()
3453                .expect("browser")
3454                .new_page()
3455                .await
3456                .expect("page");
3457
3458            let report = page
3459                .warmup(WarmupOptions {
3460                    url: "https://example.com".to_string(),
3461                    wait: WarmupWait::DomContentLoaded,
3462                    timeout_ms: 30_000,
3463                    stabilize_ms: 0,
3464                })
3465                .await
3466                .expect("warmup");
3467
3468            assert!(!report.title.is_empty(), "title populated after warmup");
3469            assert!(report.elapsed_ms > 0);
3470
3471            // Confirm the page is still usable for further queries.
3472            let html = page.content().await.expect("content");
3473            assert!(
3474                html.contains("example"),
3475                "page content available after warmup"
3476            );
3477
3478            page.close().await.expect("close");
3479            handle.release().await;
3480        });
3481    }
3482
3483    /// Refresh a page and verify session continuity (URL unchanged, page
3484    /// still navigable).
3485    #[test]
3486    #[ignore = "requires live Chrome"]
3487    #[allow(clippy::expect_used)]
3488    fn integration_refresh_keeps_session_state() {
3489        let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
3490        rt.block_on(async {
3491            use crate::{BrowserConfig, BrowserPool};
3492            let pool = BrowserPool::new(BrowserConfig::default())
3493                .await
3494                .expect("pool");
3495            let handle = pool.acquire().await.expect("handle");
3496            let mut page = handle
3497                .browser()
3498                .expect("browser")
3499                .new_page()
3500                .await
3501                .expect("page");
3502
3503            page.navigate(
3504                "https://example.com",
3505                WaitUntil::DomContentLoaded,
3506                Duration::from_secs(30),
3507            )
3508            .await
3509            .expect("initial navigate");
3510
3511            let report = page
3512                .refresh(RefreshOptions::default())
3513                .await
3514                .expect("refresh");
3515
3516            assert!(
3517                report.url.contains("example.com"),
3518                "URL retained after refresh; got: {}",
3519                report.url
3520            );
3521            assert!(report.elapsed_ms > 0);
3522
3523            page.close().await.expect("close");
3524            handle.release().await;
3525        });
3526    }
3527}