stygian_browser/
page.rs

1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache).  Each context can contain many [`PageHandle`]s
5//! (tabs).  Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::sync::{
42    Arc,
43    atomic::{AtomicU16, Ordering},
44};
45use std::time::Duration;
46
47use chromiumoxide::Page;
48use tokio::time::timeout;
49use tracing::{debug, warn};
50
51use crate::error::{BrowserError, Result};
52
53// ─── ResourceType ─────────────────────────────────────────────────────────────
54
55/// CDP resource types that can be intercepted.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub enum ResourceType {
58    /// `<img>`, `<picture>`, background images
59    Image,
60    /// Web fonts loaded via CSS `@font-face`
61    Font,
62    /// External CSS stylesheets
63    Stylesheet,
64    /// Media files (audio/video)
65    Media,
66}
67
68impl ResourceType {
69    /// Returns the string used in CDP `Network.requestIntercepted` events.
70    pub const fn as_cdp_str(&self) -> &'static str {
71        match self {
72            Self::Image => "Image",
73            Self::Font => "Font",
74            Self::Stylesheet => "Stylesheet",
75            Self::Media => "Media",
76        }
77    }
78}
79
80// ─── ResourceFilter ───────────────────────────────────────────────────────────
81
82/// Set of resource types to block from loading.
83///
84/// # Example
85///
86/// ```
87/// use stygian_browser::page::ResourceFilter;
88/// let filter = ResourceFilter::block_media();
89/// assert!(filter.should_block("Image"));
90/// ```
91#[derive(Debug, Clone, Default)]
92pub struct ResourceFilter {
93    blocked: Vec<ResourceType>,
94}
95
96impl ResourceFilter {
97    /// Block all media resources (images, fonts, CSS, audio/video).
98    pub fn block_media() -> Self {
99        Self {
100            blocked: vec![
101                ResourceType::Image,
102                ResourceType::Font,
103                ResourceType::Stylesheet,
104                ResourceType::Media,
105            ],
106        }
107    }
108
109    /// Block only images and fonts (keep styles for layout-sensitive work).
110    pub fn block_images_and_fonts() -> Self {
111        Self {
112            blocked: vec![ResourceType::Image, ResourceType::Font],
113        }
114    }
115
116    /// Add a resource type to the block list.
117    #[must_use]
118    pub fn block(mut self, resource: ResourceType) -> Self {
119        if !self.blocked.contains(&resource) {
120            self.blocked.push(resource);
121        }
122        self
123    }
124
125    /// Returns `true` if the given CDP resource type string should be blocked.
126    pub fn should_block(&self, cdp_type: &str) -> bool {
127        self.blocked
128            .iter()
129            .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
130    }
131
132    /// Returns `true` if no resource types are blocked.
133    pub const fn is_empty(&self) -> bool {
134        self.blocked.is_empty()
135    }
136}
137
138// ─── WaitUntil ────────────────────────────────────────────────────────────────
139
140/// Condition to wait for after a navigation.
141///
142/// # Example
143///
144/// ```
145/// use stygian_browser::page::WaitUntil;
146/// let w = WaitUntil::Selector("#main".to_string());
147/// assert!(matches!(w, WaitUntil::Selector(_)));
148/// ```
149#[derive(Debug, Clone)]
150pub enum WaitUntil {
151    /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
152    /// document has been fully parsed and the DOM is ready, before subresources
153    /// such as images and stylesheets finish loading.
154    DomContentLoaded,
155    /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
156    /// more than 2 network requests are in-flight for at least 500 ms
157    /// (equivalent to Playwright's `networkidle2`).
158    NetworkIdle,
159    /// Wait until `document.querySelector(selector)` returns a non-null element.
160    Selector(String),
161}
162
163// ─── PageHandle ───────────────────────────────────────────────────────────────
164
165/// A handle to an open browser tab.
166///
167/// On drop the underlying page is closed automatically.
168///
169/// # Example
170///
171/// ```no_run
172/// use stygian_browser::{BrowserPool, BrowserConfig};
173/// use stygian_browser::page::WaitUntil;
174/// use std::time::Duration;
175///
176/// # async fn run() -> stygian_browser::error::Result<()> {
177/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
178/// let handle = pool.acquire().await?;
179/// let mut page = handle.browser().expect("valid browser").new_page().await?;
180/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
181/// let html = page.content().await?;
182/// drop(page); // closes the tab
183/// handle.release().await;
184/// # Ok(())
185/// # }
186/// ```
187pub struct PageHandle {
188    page: Page,
189    cdp_timeout: Duration,
190    /// HTTP status code of the most recent main-frame navigation, or `0` if not
191    /// yet captured.  Written atomically by the listener spawned in `navigate()`.
192    last_status_code: Arc<AtomicU16>,
193    /// Background task processing `Fetch.requestPaused` events. Aborted and
194    /// replaced each time `set_resource_filter` is called.
195    resource_filter_task: Option<tokio::task::JoinHandle<()>>,
196}
197
198impl PageHandle {
199    /// Wrap a raw chromiumoxide [`Page`] in a handle.
200    pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
201        Self {
202            page,
203            cdp_timeout,
204            last_status_code: Arc::new(AtomicU16::new(0)),
205            resource_filter_task: None,
206        }
207    }
208
209    /// Navigate to `url` and wait for `condition` within `nav_timeout`.
210    ///
211    /// # Errors
212    ///
213    /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
214    /// the CDP call fails.
215    pub async fn navigate(
216        &mut self,
217        url: &str,
218        condition: WaitUntil,
219        nav_timeout: Duration,
220    ) -> Result<()> {
221        self.setup_status_capture().await;
222        timeout(
223            nav_timeout,
224            self.navigate_inner(url, condition, nav_timeout),
225        )
226        .await
227        .map_err(|_| BrowserError::NavigationFailed {
228            url: url.to_string(),
229            reason: format!("navigation timed out after {nav_timeout:?}"),
230        })?
231    }
232
233    /// Reset the last status code and wire up the `Network.responseReceived`
234    /// listener before any navigation starts.  Errors are logged and swallowed
235    /// so that a missing network domain never blocks navigation.
236    async fn setup_status_capture(&self) {
237        use chromiumoxide::cdp::browser_protocol::network::{
238            EventResponseReceived, ResourceType as NetworkResourceType,
239        };
240        use futures::StreamExt;
241
242        // Reset so a stale code is not returned if the new navigation fails
243        // before the response headers arrive.
244        self.last_status_code.store(0, Ordering::Release);
245
246        // Subscribe *before* goto() — the listener runs in a detached task and
247        // stores the first Document-type response status atomically.
248        let page_for_listener = self.page.clone();
249        let status_capture = Arc::clone(&self.last_status_code);
250        match page_for_listener
251            .event_listener::<EventResponseReceived>()
252            .await
253        {
254            Ok(mut stream) => {
255                tokio::spawn(async move {
256                    while let Some(event) = stream.next().await {
257                        if event.r#type == NetworkResourceType::Document {
258                            let code = u16::try_from(event.response.status).unwrap_or(0);
259                            if code > 0 {
260                                status_capture.store(code, Ordering::Release);
261                            }
262                            break;
263                        }
264                    }
265                });
266            }
267            Err(e) => warn!("status-code capture unavailable: {e}"),
268        }
269    }
270
271    /// Subscribe to the appropriate CDP events, fire `goto`, then await
272    /// `condition`.  All subscriptions precede `goto` to eliminate the race
273    /// described in issue #7.
274    async fn navigate_inner(
275        &self,
276        url: &str,
277        condition: WaitUntil,
278        nav_timeout: Duration,
279    ) -> Result<()> {
280        use chromiumoxide::cdp::browser_protocol::page::{
281            EventDomContentEventFired, EventLoadEventFired,
282        };
283        use futures::StreamExt;
284
285        let url_owned = url.to_string();
286
287        let mut dom_events = match &condition {
288            WaitUntil::DomContentLoaded => Some(
289                self.page
290                    .event_listener::<EventDomContentEventFired>()
291                    .await
292                    .map_err(|e| BrowserError::NavigationFailed {
293                        url: url_owned.clone(),
294                        reason: e.to_string(),
295                    })?,
296            ),
297            _ => None,
298        };
299
300        let mut load_events = match &condition {
301            WaitUntil::NetworkIdle => Some(
302                self.page
303                    .event_listener::<EventLoadEventFired>()
304                    .await
305                    .map_err(|e| BrowserError::NavigationFailed {
306                        url: url_owned.clone(),
307                        reason: e.to_string(),
308                    })?,
309            ),
310            _ => None,
311        };
312
313        let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
314            Some(self.subscribe_inflight_counter().await)
315        } else {
316            None
317        };
318
319        self.page
320            .goto(url)
321            .await
322            .map_err(|e| BrowserError::NavigationFailed {
323                url: url_owned.clone(),
324                reason: e.to_string(),
325            })?;
326
327        match &condition {
328            WaitUntil::DomContentLoaded => {
329                if let Some(ref mut events) = dom_events {
330                    let _ = events.next().await;
331                }
332            }
333            WaitUntil::NetworkIdle => {
334                if let Some(ref mut events) = load_events {
335                    let _ = events.next().await;
336                }
337                if let Some(ref counter) = inflight {
338                    Self::wait_network_idle(counter).await;
339                }
340            }
341            WaitUntil::Selector(css) => {
342                self.wait_for_selector(css, nav_timeout).await?;
343            }
344        }
345        Ok(())
346    }
347
348    /// Spawn three detached tasks that maintain a signed in-flight request
349    /// counter via `Network.requestWillBeSent` (+1) and
350    /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
351    /// Returns the shared counter so the caller can poll it.
352    async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
353        use std::sync::atomic::AtomicI32;
354
355        use chromiumoxide::cdp::browser_protocol::network::{
356            EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
357        };
358        use futures::StreamExt;
359
360        let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
361        let pairs: [(Arc<AtomicI32>, i32); 3] = [
362            (Arc::clone(&counter), 1),
363            (Arc::clone(&counter), -1),
364            (Arc::clone(&counter), -1),
365        ];
366        let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
367
368        macro_rules! spawn_tracker {
369            ($page:expr, $event:ty, $c:expr, $delta:expr) => {
370                match $page.event_listener::<$event>().await {
371                    Ok(mut s) => {
372                        let c = $c;
373                        let d = $delta;
374                        tokio::spawn(async move {
375                            while s.next().await.is_some() {
376                                c.fetch_add(d, Ordering::Relaxed);
377                            }
378                        });
379                    }
380                    Err(e) => warn!("network-idle tracker unavailable: {e}"),
381                }
382            };
383        }
384
385        let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
386        spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
387        spawn_tracker!(p2, EventLoadingFinished, c2, d2);
388        spawn_tracker!(p3, EventLoadingFailed, c3, d3);
389
390        counter
391    }
392
393    /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
394    /// (equivalent to Playwright's `networkidle2`).
395    async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
396        const IDLE_THRESHOLD: i32 = 2;
397        const SETTLE: Duration = Duration::from_millis(500);
398        loop {
399            if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
400                tokio::time::sleep(SETTLE).await;
401                if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
402                    break;
403                }
404            } else {
405                tokio::time::sleep(Duration::from_millis(50)).await;
406            }
407        }
408    }
409
410    /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
411    ///
412    /// # Errors
413    ///
414    /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
415    /// within the given timeout.
416    pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
417        let selector_owned = selector.to_string();
418        let poll = async {
419            loop {
420                if self.page.find_element(selector_owned.clone()).await.is_ok() {
421                    return Ok(());
422                }
423                tokio::time::sleep(Duration::from_millis(100)).await;
424            }
425        };
426
427        timeout(wait_timeout, poll)
428            .await
429            .map_err(|_| BrowserError::NavigationFailed {
430                url: String::new(),
431                reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
432            })?
433    }
434
435    /// Set a resource filter to block specific network request types.
436    ///
437    /// Enables `Fetch` interception and spawns a background task that continues
438    /// allowed requests and fails blocked ones with `BlockedByClient`. Any
439    /// previously set filter task is cancelled first.
440    ///
441    /// # Errors
442    ///
443    /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
444    pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
445        use chromiumoxide::cdp::browser_protocol::fetch::{
446            ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
447            RequestPattern,
448        };
449        use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
450        use futures::StreamExt as _;
451
452        if filter.is_empty() {
453            return Ok(());
454        }
455
456        // Cancel any previously running filter task.
457        if let Some(task) = self.resource_filter_task.take() {
458            task.abort();
459        }
460
461        let pattern = RequestPattern::builder().url_pattern("*").build();
462        let params = EnableParams::builder()
463            .patterns(vec![pattern])
464            .handle_auth_requests(false)
465            .build();
466
467        timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
468            .await
469            .map_err(|_| BrowserError::Timeout {
470                operation: "Fetch.enable".to_string(),
471                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
472            })?
473            .map_err(|e| BrowserError::CdpError {
474                operation: "Fetch.enable".to_string(),
475                message: e.to_string(),
476            })?;
477
478        // Subscribe to requestPaused events and dispatch each one so navigation
479        // is never blocked. Without this handler Chrome holds every intercepted
480        // request indefinitely and the page hangs.
481        let mut events = self
482            .page
483            .event_listener::<EventRequestPaused>()
484            .await
485            .map_err(|e| BrowserError::CdpError {
486                operation: "Fetch.requestPaused subscribe".to_string(),
487                message: e.to_string(),
488            })?;
489
490        let page = self.page.clone();
491        debug!("Resource filter active: {:?}", filter);
492        let task = tokio::spawn(async move {
493            while let Some(event) = events.next().await {
494                let request_id = event.request_id.clone();
495                if filter.should_block(event.resource_type.as_ref()) {
496                    let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
497                    let _ = page.execute(params).await;
498                } else {
499                    let _ = page.execute(ContinueRequestParams::new(request_id)).await;
500                }
501            }
502        });
503
504        self.resource_filter_task = Some(task);
505        Ok(())
506    }
507
508    /// Return the current page URL (post-navigation, post-redirect).
509    ///
510    /// Delegates to the CDP `Target.getTargetInfo` binding already used
511    /// internally by [`save_cookies`](Self::save_cookies); no extra network
512    /// request is made.  Returns an empty string if the URL is not yet set
513    /// (e.g. on a blank tab before the first navigation).
514    ///
515    /// # Errors
516    ///
517    /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
518    /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
519    ///
520    /// # Example
521    ///
522    /// ```no_run
523    /// use stygian_browser::{BrowserPool, BrowserConfig};
524    /// use stygian_browser::page::WaitUntil;
525    /// use std::time::Duration;
526    ///
527    /// # async fn run() -> stygian_browser::error::Result<()> {
528    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
529    /// let handle = pool.acquire().await?;
530    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
531    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
532    /// let url = page.url().await?;
533    /// println!("Final URL after redirects: {url}");
534    /// # Ok(())
535    /// # }
536    /// ```
537    pub async fn url(&self) -> Result<String> {
538        timeout(self.cdp_timeout, self.page.url())
539            .await
540            .map_err(|_| BrowserError::Timeout {
541                operation: "page.url".to_string(),
542                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
543            })?
544            .map_err(|e| BrowserError::CdpError {
545                operation: "page.url".to_string(),
546                message: e.to_string(),
547            })
548            .map(Option::unwrap_or_default)
549    }
550
551    /// Return the HTTP status code of the most recent main-frame navigation.
552    ///
553    /// The status is captured from the `Network.responseReceived` CDP event
554    /// wired up inside [`navigate`](Self::navigate), so it reflects the
555    /// *final* response after any server-side redirects.
556    ///
557    /// Returns `None` if the status was not captured — for example on `file://`
558    /// navigations, when [`navigate`](Self::navigate) has not yet been called,
559    /// or if the network event subscription failed.
560    ///
561    /// # Errors
562    ///
563    /// This method is infallible; the `Result` wrapper is kept for API
564    /// consistency with other `PageHandle` methods.
565    ///
566    /// # Example
567    ///
568    /// ```no_run
569    /// use stygian_browser::{BrowserPool, BrowserConfig};
570    /// use stygian_browser::page::WaitUntil;
571    /// use std::time::Duration;
572    ///
573    /// # async fn run() -> stygian_browser::error::Result<()> {
574    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
575    /// let handle = pool.acquire().await?;
576    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
577    /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
578    /// if let Some(code) = page.status_code()? {
579    ///     println!("HTTP {code}");
580    /// }
581    /// # Ok(())
582    /// # }
583    /// ```
584    pub fn status_code(&self) -> Result<Option<u16>> {
585        let code = self.last_status_code.load(Ordering::Acquire);
586        Ok(if code == 0 { None } else { Some(code) })
587    }
588
589    /// Return the page's `<title>` text.
590    ///
591    /// # Errors
592    ///
593    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
594    pub async fn title(&self) -> Result<String> {
595        timeout(self.cdp_timeout, self.page.get_title())
596            .await
597            .map_err(|_| BrowserError::Timeout {
598                operation: "get_title".to_string(),
599                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
600            })?
601            .map_err(|e| BrowserError::ScriptExecutionFailed {
602                script: "document.title".to_string(),
603                reason: e.to_string(),
604            })
605            .map(Option::unwrap_or_default)
606    }
607
608    /// Return the page's full outer HTML.
609    ///
610    /// # Errors
611    ///
612    /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
613    pub async fn content(&self) -> Result<String> {
614        timeout(self.cdp_timeout, self.page.content())
615            .await
616            .map_err(|_| BrowserError::Timeout {
617                operation: "page.content".to_string(),
618                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
619            })?
620            .map_err(|e| BrowserError::ScriptExecutionFailed {
621                script: "document.documentElement.outerHTML".to_string(),
622                reason: e.to_string(),
623            })
624    }
625
626    /// Evaluate arbitrary JavaScript and return the result as `T`.
627    ///
628    /// # Errors
629    ///
630    /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
631    /// deserialization error.
632    pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
633        let script_owned = script.to_string();
634        timeout(self.cdp_timeout, self.page.evaluate(script))
635            .await
636            .map_err(|_| BrowserError::Timeout {
637                operation: "page.evaluate".to_string(),
638                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
639            })?
640            .map_err(|e| BrowserError::ScriptExecutionFailed {
641                script: script_owned.clone(),
642                reason: e.to_string(),
643            })?
644            .into_value::<T>()
645            .map_err(|e| BrowserError::ScriptExecutionFailed {
646                script: script_owned,
647                reason: e.to_string(),
648            })
649    }
650
651    /// Save all cookies for the current page's origin.
652    ///
653    /// # Errors
654    ///
655    /// Returns [`BrowserError::CdpError`] if the CDP call fails.
656    pub async fn save_cookies(
657        &self,
658    ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
659        use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
660
661        let url = self
662            .page
663            .url()
664            .await
665            .map_err(|e| BrowserError::CdpError {
666                operation: "page.url".to_string(),
667                message: e.to_string(),
668            })?
669            .unwrap_or_default();
670
671        timeout(
672            self.cdp_timeout,
673            self.page
674                .execute(GetCookiesParams::builder().urls(vec![url]).build()),
675        )
676        .await
677        .map_err(|_| BrowserError::Timeout {
678            operation: "Network.getCookies".to_string(),
679            duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
680        })?
681        .map_err(|e| BrowserError::CdpError {
682            operation: "Network.getCookies".to_string(),
683            message: e.to_string(),
684        })
685        .map(|r| r.cookies.clone())
686    }
687
688    /// Inject cookies into the current page.
689    ///
690    /// Seeds session tokens or other state without needing a full
691    /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
692    /// requiring a direct `chromiumoxide` dependency in calling code.
693    ///
694    /// Individual cookie failures are logged as warnings and do not abort the
695    /// remaining cookies.
696    ///
697    /// # Errors
698    ///
699    /// Returns [`BrowserError::Timeout`] if a single `Network.setCookie` CDP
700    /// call exceeds `cdp_timeout`.
701    ///
702    /// # Example
703    ///
704    /// ```no_run
705    /// use stygian_browser::{BrowserPool, BrowserConfig};
706    /// use stygian_browser::session::SessionCookie;
707    /// use std::time::Duration;
708    ///
709    /// # async fn run() -> stygian_browser::error::Result<()> {
710    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
711    /// let handle = pool.acquire().await?;
712    /// let page = handle.browser().expect("valid browser").new_page().await?;
713    /// let cookies = vec![SessionCookie {
714    ///     name: "session".to_string(),
715    ///     value: "abc123".to_string(),
716    ///     domain: ".example.com".to_string(),
717    ///     path: "/".to_string(),
718    ///     expires: -1.0,
719    ///     http_only: true,
720    ///     secure: true,
721    ///     same_site: "Lax".to_string(),
722    /// }];
723    /// page.inject_cookies(&cookies).await?;
724    /// # Ok(())
725    /// # }
726    /// ```
727    pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
728        use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
729
730        for cookie in cookies {
731            let params = match SetCookieParams::builder()
732                .name(cookie.name.clone())
733                .value(cookie.value.clone())
734                .domain(cookie.domain.clone())
735                .path(cookie.path.clone())
736                .http_only(cookie.http_only)
737                .secure(cookie.secure)
738                .build()
739            {
740                Ok(p) => p,
741                Err(e) => {
742                    warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
743                    continue;
744                }
745            };
746
747            match timeout(self.cdp_timeout, self.page.execute(params)).await {
748                Err(_) => {
749                    warn!(
750                        cookie = %cookie.name,
751                        timeout_ms = self.cdp_timeout.as_millis(),
752                        "Timed out injecting cookie"
753                    );
754                }
755                Ok(Err(e)) => {
756                    warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
757                }
758                Ok(Ok(_)) => {}
759            }
760        }
761
762        debug!(count = cookies.len(), "Cookies injected");
763        Ok(())
764    }
765
766    /// Capture a screenshot of the current page as PNG bytes.
767    ///
768    /// The screenshot is full-page by default (viewport clipped to the rendered
769    /// layout area).  Save the returned bytes to a `.png` file or process
770    /// them in-memory.
771    ///
772    /// # Errors
773    ///
774    /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
775    /// command fails, or [`BrowserError::Timeout`] if it exceeds
776    /// `cdp_timeout`.
777    ///
778    /// # Example
779    ///
780    /// ```no_run
781    /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
782    /// use std::{time::Duration, fs};
783    ///
784    /// # async fn run() -> stygian_browser::error::Result<()> {
785    /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
786    /// let handle = pool.acquire().await?;
787    /// let mut page = handle.browser().expect("valid browser").new_page().await?;
788    /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
789    /// let png = page.screenshot().await?;
790    /// fs::write("screenshot.png", &png).unwrap();
791    /// # Ok(())
792    /// # }
793    /// ```
794    pub async fn screenshot(&self) -> Result<Vec<u8>> {
795        use chromiumoxide::page::ScreenshotParams;
796
797        let params = ScreenshotParams::builder().full_page(true).build();
798
799        timeout(self.cdp_timeout, self.page.screenshot(params))
800            .await
801            .map_err(|_| BrowserError::Timeout {
802                operation: "Page.captureScreenshot".to_string(),
803                duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
804            })?
805            .map_err(|e| BrowserError::CdpError {
806                operation: "Page.captureScreenshot".to_string(),
807                message: e.to_string(),
808            })
809    }
810
811    /// Borrow the underlying chromiumoxide [`Page`].
812    pub const fn inner(&self) -> &Page {
813        &self.page
814    }
815
816    /// Close this page (tab).
817    ///
818    /// Called automatically on drop; explicit call avoids suppressing the error.
819    pub async fn close(self) -> Result<()> {
820        timeout(Duration::from_secs(5), self.page.clone().close())
821            .await
822            .map_err(|_| BrowserError::Timeout {
823                operation: "page.close".to_string(),
824                duration_ms: 5000,
825            })?
826            .map_err(|e| BrowserError::CdpError {
827                operation: "page.close".to_string(),
828                message: e.to_string(),
829            })
830    }
831}
832
833impl Drop for PageHandle {
834    fn drop(&mut self) {
835        warn!("PageHandle dropped without explicit close(); spawning cleanup task");
836        // chromiumoxide Page does not implement close on Drop, so we spawn
837        // a fire-and-forget task. The page ref is already owned; we need to
838        // swap it out. We clone the Page handle (it's Arc-backed internally).
839        let page = self.page.clone();
840        tokio::spawn(async move {
841            let _ = page.close().await;
842        });
843    }
844}
845
846// ─── Tests ────────────────────────────────────────────────────────────────────
847
848#[cfg(test)]
849mod tests {
850    use super::*;
851
852    #[test]
853    fn resource_filter_block_media_blocks_image() {
854        let filter = ResourceFilter::block_media();
855        assert!(filter.should_block("Image"));
856        assert!(filter.should_block("Font"));
857        assert!(filter.should_block("Stylesheet"));
858        assert!(filter.should_block("Media"));
859        assert!(!filter.should_block("Script"));
860        assert!(!filter.should_block("XHR"));
861    }
862
863    #[test]
864    fn resource_filter_case_insensitive() {
865        let filter = ResourceFilter::block_images_and_fonts();
866        assert!(filter.should_block("image")); // lowercase
867        assert!(filter.should_block("IMAGE")); // uppercase
868        assert!(!filter.should_block("Stylesheet"));
869    }
870
871    #[test]
872    fn resource_filter_builder_chain() {
873        let filter = ResourceFilter::default()
874            .block(ResourceType::Image)
875            .block(ResourceType::Font);
876        assert!(filter.should_block("Image"));
877        assert!(filter.should_block("Font"));
878        assert!(!filter.should_block("Stylesheet"));
879    }
880
881    #[test]
882    fn resource_filter_dedup_block() {
883        let filter = ResourceFilter::default()
884            .block(ResourceType::Image)
885            .block(ResourceType::Image); // duplicate
886        assert_eq!(filter.blocked.len(), 1);
887    }
888
889    #[test]
890    fn resource_filter_is_empty_when_default() {
891        assert!(ResourceFilter::default().is_empty());
892        assert!(!ResourceFilter::block_media().is_empty());
893    }
894
895    #[test]
896    fn wait_until_selector_stores_string() {
897        let w = WaitUntil::Selector("#foo".to_string());
898        assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
899    }
900
901    #[test]
902    fn resource_type_cdp_str() {
903        assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
904        assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
905        assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
906        assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
907    }
908
909    /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
910    #[test]
911    fn page_handle_is_send_sync() {
912        fn assert_send<T: Send>() {}
913        fn assert_sync<T: Sync>() {}
914        assert_send::<PageHandle>();
915        assert_sync::<PageHandle>();
916    }
917
918    /// The status-code sentinel (0 = "not yet captured") and the conversion to
919    /// `Option<u16>` are pure-logic invariants testable without a live browser.
920    #[test]
921    fn status_code_sentinel_zero_maps_to_none() {
922        use std::sync::atomic::{AtomicU16, Ordering};
923        let atom = AtomicU16::new(0);
924        let code = atom.load(Ordering::Acquire);
925        assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
926    }
927
928    #[test]
929    fn status_code_non_zero_maps_to_some() {
930        use std::sync::atomic::{AtomicU16, Ordering};
931        for &expected in &[200u16, 301, 404, 503] {
932            let atom = AtomicU16::new(expected);
933            let code = atom.load(Ordering::Acquire);
934            assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
935        }
936    }
937}