stygian_browser/page.rs
1//! Page and browsing context management for isolated, parallel scraping
2//!
3//! Each `BrowserContext` (future) is an incognito-style isolation boundary (separate
4//! cookies, localStorage, cache). Each context can contain many [`PageHandle`]s
5//! (tabs). Both types clean up their CDP resources automatically on drop.
6//!
7//! ## Resource blocking
8//!
9//! Pass a [`ResourceFilter`] to [`PageHandle::set_resource_filter`] to intercept
10//! and block specific request types (images, fonts, CSS) before page load —
11//! significantly reducing page load times for text-only scraping.
12//!
13//! ## Wait strategies
14//!
15//! [`PageHandle`] exposes three wait strategies via [`WaitUntil`]:
16//! - `DomContentLoaded` — fires when the HTML is parsed
17//! - `NetworkIdle` — fires when there are ≤2 in-flight requests for 500 ms
18//! - `Selector(css)` — fires when a CSS selector matches an element
19//!
20//! # Example
21//!
22//! ```no_run
23//! use stygian_browser::{BrowserPool, BrowserConfig};
24//! use stygian_browser::page::{ResourceFilter, WaitUntil};
25//! use std::time::Duration;
26//!
27//! # async fn run() -> stygian_browser::error::Result<()> {
28//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
29//! let handle = pool.acquire().await?;
30//!
31//! let mut page = handle.browser().expect("valid browser").new_page().await?;
32//! page.set_resource_filter(ResourceFilter::block_media()).await?;
33//! page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
34//! let title = page.title().await?;
35//! println!("title: {title}");
36//! handle.release().await;
37//! # Ok(())
38//! # }
39//! ```
40
41use std::sync::{
42 Arc,
43 atomic::{AtomicU16, Ordering},
44};
45use std::time::Duration;
46
47use chromiumoxide::Page;
48use tokio::time::timeout;
49use tracing::{debug, warn};
50
51use crate::error::{BrowserError, Result};
52
53// ─── ResourceType ─────────────────────────────────────────────────────────────
54
55/// CDP resource types that can be intercepted.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub enum ResourceType {
58 /// `<img>`, `<picture>`, background images
59 Image,
60 /// Web fonts loaded via CSS `@font-face`
61 Font,
62 /// External CSS stylesheets
63 Stylesheet,
64 /// Media files (audio/video)
65 Media,
66}
67
68impl ResourceType {
69 /// Returns the string used in CDP `Network.requestIntercepted` events.
70 pub const fn as_cdp_str(&self) -> &'static str {
71 match self {
72 Self::Image => "Image",
73 Self::Font => "Font",
74 Self::Stylesheet => "Stylesheet",
75 Self::Media => "Media",
76 }
77 }
78}
79
80// ─── ResourceFilter ───────────────────────────────────────────────────────────
81
82/// Set of resource types to block from loading.
83///
84/// # Example
85///
86/// ```
87/// use stygian_browser::page::ResourceFilter;
88/// let filter = ResourceFilter::block_media();
89/// assert!(filter.should_block("Image"));
90/// ```
91#[derive(Debug, Clone, Default)]
92pub struct ResourceFilter {
93 blocked: Vec<ResourceType>,
94}
95
96impl ResourceFilter {
97 /// Block all media resources (images, fonts, CSS, audio/video).
98 pub fn block_media() -> Self {
99 Self {
100 blocked: vec![
101 ResourceType::Image,
102 ResourceType::Font,
103 ResourceType::Stylesheet,
104 ResourceType::Media,
105 ],
106 }
107 }
108
109 /// Block only images and fonts (keep styles for layout-sensitive work).
110 pub fn block_images_and_fonts() -> Self {
111 Self {
112 blocked: vec![ResourceType::Image, ResourceType::Font],
113 }
114 }
115
116 /// Add a resource type to the block list.
117 #[must_use]
118 pub fn block(mut self, resource: ResourceType) -> Self {
119 if !self.blocked.contains(&resource) {
120 self.blocked.push(resource);
121 }
122 self
123 }
124
125 /// Returns `true` if the given CDP resource type string should be blocked.
126 pub fn should_block(&self, cdp_type: &str) -> bool {
127 self.blocked
128 .iter()
129 .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
130 }
131
132 /// Returns `true` if no resource types are blocked.
133 pub const fn is_empty(&self) -> bool {
134 self.blocked.is_empty()
135 }
136}
137
138// ─── WaitUntil ────────────────────────────────────────────────────────────────
139
140/// Condition to wait for after a navigation.
141///
142/// # Example
143///
144/// ```
145/// use stygian_browser::page::WaitUntil;
146/// let w = WaitUntil::Selector("#main".to_string());
147/// assert!(matches!(w, WaitUntil::Selector(_)));
148/// ```
149#[derive(Debug, Clone)]
150pub enum WaitUntil {
151 /// Wait for the `Page.domContentEventFired` CDP event — fires when the HTML
152 /// document has been fully parsed and the DOM is ready, before subresources
153 /// such as images and stylesheets finish loading.
154 DomContentLoaded,
155 /// Wait for the `Page.loadEventFired` CDP event **and** then wait until no
156 /// more than 2 network requests are in-flight for at least 500 ms
157 /// (equivalent to Playwright's `networkidle2`).
158 NetworkIdle,
159 /// Wait until `document.querySelector(selector)` returns a non-null element.
160 Selector(String),
161}
162
163// ─── PageHandle ───────────────────────────────────────────────────────────────
164
165/// A handle to an open browser tab.
166///
167/// On drop the underlying page is closed automatically.
168///
169/// # Example
170///
171/// ```no_run
172/// use stygian_browser::{BrowserPool, BrowserConfig};
173/// use stygian_browser::page::WaitUntil;
174/// use std::time::Duration;
175///
176/// # async fn run() -> stygian_browser::error::Result<()> {
177/// let pool = BrowserPool::new(BrowserConfig::default()).await?;
178/// let handle = pool.acquire().await?;
179/// let mut page = handle.browser().expect("valid browser").new_page().await?;
180/// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
181/// let html = page.content().await?;
182/// drop(page); // closes the tab
183/// handle.release().await;
184/// # Ok(())
185/// # }
186/// ```
187pub struct PageHandle {
188 page: Page,
189 cdp_timeout: Duration,
190 /// HTTP status code of the most recent main-frame navigation, or `0` if not
191 /// yet captured. Written atomically by the listener spawned in `navigate()`.
192 last_status_code: Arc<AtomicU16>,
193 /// Background task processing `Fetch.requestPaused` events. Aborted and
194 /// replaced each time `set_resource_filter` is called.
195 resource_filter_task: Option<tokio::task::JoinHandle<()>>,
196}
197
198impl PageHandle {
199 /// Wrap a raw chromiumoxide [`Page`] in a handle.
200 pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
201 Self {
202 page,
203 cdp_timeout,
204 last_status_code: Arc::new(AtomicU16::new(0)),
205 resource_filter_task: None,
206 }
207 }
208
209 /// Navigate to `url` and wait for `condition` within `nav_timeout`.
210 ///
211 /// # Errors
212 ///
213 /// Returns [`BrowserError::NavigationFailed`] if the navigation times out or
214 /// the CDP call fails.
215 pub async fn navigate(
216 &mut self,
217 url: &str,
218 condition: WaitUntil,
219 nav_timeout: Duration,
220 ) -> Result<()> {
221 self.setup_status_capture().await;
222 timeout(
223 nav_timeout,
224 self.navigate_inner(url, condition, nav_timeout),
225 )
226 .await
227 .map_err(|_| BrowserError::NavigationFailed {
228 url: url.to_string(),
229 reason: format!("navigation timed out after {nav_timeout:?}"),
230 })?
231 }
232
233 /// Reset the last status code and wire up the `Network.responseReceived`
234 /// listener before any navigation starts. Errors are logged and swallowed
235 /// so that a missing network domain never blocks navigation.
236 async fn setup_status_capture(&self) {
237 use chromiumoxide::cdp::browser_protocol::network::{
238 EventResponseReceived, ResourceType as NetworkResourceType,
239 };
240 use futures::StreamExt;
241
242 // Reset so a stale code is not returned if the new navigation fails
243 // before the response headers arrive.
244 self.last_status_code.store(0, Ordering::Release);
245
246 // Subscribe *before* goto() — the listener runs in a detached task and
247 // stores the first Document-type response status atomically.
248 let page_for_listener = self.page.clone();
249 let status_capture = Arc::clone(&self.last_status_code);
250 match page_for_listener
251 .event_listener::<EventResponseReceived>()
252 .await
253 {
254 Ok(mut stream) => {
255 tokio::spawn(async move {
256 while let Some(event) = stream.next().await {
257 if event.r#type == NetworkResourceType::Document {
258 let code = u16::try_from(event.response.status).unwrap_or(0);
259 if code > 0 {
260 status_capture.store(code, Ordering::Release);
261 }
262 break;
263 }
264 }
265 });
266 }
267 Err(e) => warn!("status-code capture unavailable: {e}"),
268 }
269 }
270
271 /// Subscribe to the appropriate CDP events, fire `goto`, then await
272 /// `condition`. All subscriptions precede `goto` to eliminate the race
273 /// described in issue #7.
274 async fn navigate_inner(
275 &self,
276 url: &str,
277 condition: WaitUntil,
278 nav_timeout: Duration,
279 ) -> Result<()> {
280 use chromiumoxide::cdp::browser_protocol::page::{
281 EventDomContentEventFired, EventLoadEventFired,
282 };
283 use futures::StreamExt;
284
285 let url_owned = url.to_string();
286
287 let mut dom_events = match &condition {
288 WaitUntil::DomContentLoaded => Some(
289 self.page
290 .event_listener::<EventDomContentEventFired>()
291 .await
292 .map_err(|e| BrowserError::NavigationFailed {
293 url: url_owned.clone(),
294 reason: e.to_string(),
295 })?,
296 ),
297 _ => None,
298 };
299
300 let mut load_events = match &condition {
301 WaitUntil::NetworkIdle => Some(
302 self.page
303 .event_listener::<EventLoadEventFired>()
304 .await
305 .map_err(|e| BrowserError::NavigationFailed {
306 url: url_owned.clone(),
307 reason: e.to_string(),
308 })?,
309 ),
310 _ => None,
311 };
312
313 let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
314 Some(self.subscribe_inflight_counter().await)
315 } else {
316 None
317 };
318
319 self.page
320 .goto(url)
321 .await
322 .map_err(|e| BrowserError::NavigationFailed {
323 url: url_owned.clone(),
324 reason: e.to_string(),
325 })?;
326
327 match &condition {
328 WaitUntil::DomContentLoaded => {
329 if let Some(ref mut events) = dom_events {
330 let _ = events.next().await;
331 }
332 }
333 WaitUntil::NetworkIdle => {
334 if let Some(ref mut events) = load_events {
335 let _ = events.next().await;
336 }
337 if let Some(ref counter) = inflight {
338 Self::wait_network_idle(counter).await;
339 }
340 }
341 WaitUntil::Selector(css) => {
342 self.wait_for_selector(css, nav_timeout).await?;
343 }
344 }
345 Ok(())
346 }
347
348 /// Spawn three detached tasks that maintain a signed in-flight request
349 /// counter via `Network.requestWillBeSent` (+1) and
350 /// `Network.loadingFinished`/`Network.loadingFailed` (−1 each).
351 /// Returns the shared counter so the caller can poll it.
352 async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
353 use std::sync::atomic::AtomicI32;
354
355 use chromiumoxide::cdp::browser_protocol::network::{
356 EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
357 };
358 use futures::StreamExt;
359
360 let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
361 let pairs: [(Arc<AtomicI32>, i32); 3] = [
362 (Arc::clone(&counter), 1),
363 (Arc::clone(&counter), -1),
364 (Arc::clone(&counter), -1),
365 ];
366 let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
367
368 macro_rules! spawn_tracker {
369 ($page:expr, $event:ty, $c:expr, $delta:expr) => {
370 match $page.event_listener::<$event>().await {
371 Ok(mut s) => {
372 let c = $c;
373 let d = $delta;
374 tokio::spawn(async move {
375 while s.next().await.is_some() {
376 c.fetch_add(d, Ordering::Relaxed);
377 }
378 });
379 }
380 Err(e) => warn!("network-idle tracker unavailable: {e}"),
381 }
382 };
383 }
384
385 let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
386 spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
387 spawn_tracker!(p2, EventLoadingFinished, c2, d2);
388 spawn_tracker!(p3, EventLoadingFailed, c3, d3);
389
390 counter
391 }
392
393 /// Poll `counter` until ≤ 2 in-flight requests persist for 500 ms
394 /// (equivalent to Playwright's `networkidle2`).
395 async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
396 const IDLE_THRESHOLD: i32 = 2;
397 const SETTLE: Duration = Duration::from_millis(500);
398 loop {
399 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
400 tokio::time::sleep(SETTLE).await;
401 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
402 break;
403 }
404 } else {
405 tokio::time::sleep(Duration::from_millis(50)).await;
406 }
407 }
408 }
409
410 /// Wait until `document.querySelector(selector)` is non-null (`timeout`).
411 ///
412 /// # Errors
413 ///
414 /// Returns [`BrowserError::NavigationFailed`] if the selector is not found
415 /// within the given timeout.
416 pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
417 let selector_owned = selector.to_string();
418 let poll = async {
419 loop {
420 if self.page.find_element(selector_owned.clone()).await.is_ok() {
421 return Ok(());
422 }
423 tokio::time::sleep(Duration::from_millis(100)).await;
424 }
425 };
426
427 timeout(wait_timeout, poll)
428 .await
429 .map_err(|_| BrowserError::NavigationFailed {
430 url: String::new(),
431 reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
432 })?
433 }
434
435 /// Set a resource filter to block specific network request types.
436 ///
437 /// Enables `Fetch` interception and spawns a background task that continues
438 /// allowed requests and fails blocked ones with `BlockedByClient`. Any
439 /// previously set filter task is cancelled first.
440 ///
441 /// # Errors
442 ///
443 /// Returns a [`BrowserError::CdpError`] if the CDP call fails.
444 pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
445 use chromiumoxide::cdp::browser_protocol::fetch::{
446 ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
447 RequestPattern,
448 };
449 use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
450 use futures::StreamExt as _;
451
452 if filter.is_empty() {
453 return Ok(());
454 }
455
456 // Cancel any previously running filter task.
457 if let Some(task) = self.resource_filter_task.take() {
458 task.abort();
459 }
460
461 let pattern = RequestPattern::builder().url_pattern("*").build();
462 let params = EnableParams::builder()
463 .patterns(vec![pattern])
464 .handle_auth_requests(false)
465 .build();
466
467 timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
468 .await
469 .map_err(|_| BrowserError::Timeout {
470 operation: "Fetch.enable".to_string(),
471 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
472 })?
473 .map_err(|e| BrowserError::CdpError {
474 operation: "Fetch.enable".to_string(),
475 message: e.to_string(),
476 })?;
477
478 // Subscribe to requestPaused events and dispatch each one so navigation
479 // is never blocked. Without this handler Chrome holds every intercepted
480 // request indefinitely and the page hangs.
481 let mut events = self
482 .page
483 .event_listener::<EventRequestPaused>()
484 .await
485 .map_err(|e| BrowserError::CdpError {
486 operation: "Fetch.requestPaused subscribe".to_string(),
487 message: e.to_string(),
488 })?;
489
490 let page = self.page.clone();
491 debug!("Resource filter active: {:?}", filter);
492 let task = tokio::spawn(async move {
493 while let Some(event) = events.next().await {
494 let request_id = event.request_id.clone();
495 if filter.should_block(event.resource_type.as_ref()) {
496 let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
497 let _ = page.execute(params).await;
498 } else {
499 let _ = page.execute(ContinueRequestParams::new(request_id)).await;
500 }
501 }
502 });
503
504 self.resource_filter_task = Some(task);
505 Ok(())
506 }
507
508 /// Return the current page URL (post-navigation, post-redirect).
509 ///
510 /// Delegates to the CDP `Target.getTargetInfo` binding already used
511 /// internally by [`save_cookies`](Self::save_cookies); no extra network
512 /// request is made. Returns an empty string if the URL is not yet set
513 /// (e.g. on a blank tab before the first navigation).
514 ///
515 /// # Errors
516 ///
517 /// Returns [`BrowserError::CdpError`] if the underlying CDP call fails, or
518 /// [`BrowserError::Timeout`] if it exceeds `cdp_timeout`.
519 ///
520 /// # Example
521 ///
522 /// ```no_run
523 /// use stygian_browser::{BrowserPool, BrowserConfig};
524 /// use stygian_browser::page::WaitUntil;
525 /// use std::time::Duration;
526 ///
527 /// # async fn run() -> stygian_browser::error::Result<()> {
528 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
529 /// let handle = pool.acquire().await?;
530 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
531 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
532 /// let url = page.url().await?;
533 /// println!("Final URL after redirects: {url}");
534 /// # Ok(())
535 /// # }
536 /// ```
537 pub async fn url(&self) -> Result<String> {
538 timeout(self.cdp_timeout, self.page.url())
539 .await
540 .map_err(|_| BrowserError::Timeout {
541 operation: "page.url".to_string(),
542 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
543 })?
544 .map_err(|e| BrowserError::CdpError {
545 operation: "page.url".to_string(),
546 message: e.to_string(),
547 })
548 .map(Option::unwrap_or_default)
549 }
550
551 /// Return the HTTP status code of the most recent main-frame navigation.
552 ///
553 /// The status is captured from the `Network.responseReceived` CDP event
554 /// wired up inside [`navigate`](Self::navigate), so it reflects the
555 /// *final* response after any server-side redirects.
556 ///
557 /// Returns `None` if the status was not captured — for example on `file://`
558 /// navigations, when [`navigate`](Self::navigate) has not yet been called,
559 /// or if the network event subscription failed.
560 ///
561 /// # Errors
562 ///
563 /// This method is infallible; the `Result` wrapper is kept for API
564 /// consistency with other `PageHandle` methods.
565 ///
566 /// # Example
567 ///
568 /// ```no_run
569 /// use stygian_browser::{BrowserPool, BrowserConfig};
570 /// use stygian_browser::page::WaitUntil;
571 /// use std::time::Duration;
572 ///
573 /// # async fn run() -> stygian_browser::error::Result<()> {
574 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
575 /// let handle = pool.acquire().await?;
576 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
577 /// page.navigate("https://example.com", WaitUntil::DomContentLoaded, Duration::from_secs(30)).await?;
578 /// if let Some(code) = page.status_code()? {
579 /// println!("HTTP {code}");
580 /// }
581 /// # Ok(())
582 /// # }
583 /// ```
584 pub fn status_code(&self) -> Result<Option<u16>> {
585 let code = self.last_status_code.load(Ordering::Acquire);
586 Ok(if code == 0 { None } else { Some(code) })
587 }
588
589 /// Return the page's `<title>` text.
590 ///
591 /// # Errors
592 ///
593 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
594 pub async fn title(&self) -> Result<String> {
595 timeout(self.cdp_timeout, self.page.get_title())
596 .await
597 .map_err(|_| BrowserError::Timeout {
598 operation: "get_title".to_string(),
599 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
600 })?
601 .map_err(|e| BrowserError::ScriptExecutionFailed {
602 script: "document.title".to_string(),
603 reason: e.to_string(),
604 })
605 .map(Option::unwrap_or_default)
606 }
607
608 /// Return the page's full outer HTML.
609 ///
610 /// # Errors
611 ///
612 /// Returns [`BrowserError::ScriptExecutionFailed`] if the evaluation fails.
613 pub async fn content(&self) -> Result<String> {
614 timeout(self.cdp_timeout, self.page.content())
615 .await
616 .map_err(|_| BrowserError::Timeout {
617 operation: "page.content".to_string(),
618 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
619 })?
620 .map_err(|e| BrowserError::ScriptExecutionFailed {
621 script: "document.documentElement.outerHTML".to_string(),
622 reason: e.to_string(),
623 })
624 }
625
626 /// Evaluate arbitrary JavaScript and return the result as `T`.
627 ///
628 /// # Errors
629 ///
630 /// Returns [`BrowserError::ScriptExecutionFailed`] on eval failure or
631 /// deserialization error.
632 pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
633 let script_owned = script.to_string();
634 timeout(self.cdp_timeout, self.page.evaluate(script))
635 .await
636 .map_err(|_| BrowserError::Timeout {
637 operation: "page.evaluate".to_string(),
638 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
639 })?
640 .map_err(|e| BrowserError::ScriptExecutionFailed {
641 script: script_owned.clone(),
642 reason: e.to_string(),
643 })?
644 .into_value::<T>()
645 .map_err(|e| BrowserError::ScriptExecutionFailed {
646 script: script_owned,
647 reason: e.to_string(),
648 })
649 }
650
651 /// Save all cookies for the current page's origin.
652 ///
653 /// # Errors
654 ///
655 /// Returns [`BrowserError::CdpError`] if the CDP call fails.
656 pub async fn save_cookies(
657 &self,
658 ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
659 use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
660
661 let url = self
662 .page
663 .url()
664 .await
665 .map_err(|e| BrowserError::CdpError {
666 operation: "page.url".to_string(),
667 message: e.to_string(),
668 })?
669 .unwrap_or_default();
670
671 timeout(
672 self.cdp_timeout,
673 self.page
674 .execute(GetCookiesParams::builder().urls(vec![url]).build()),
675 )
676 .await
677 .map_err(|_| BrowserError::Timeout {
678 operation: "Network.getCookies".to_string(),
679 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
680 })?
681 .map_err(|e| BrowserError::CdpError {
682 operation: "Network.getCookies".to_string(),
683 message: e.to_string(),
684 })
685 .map(|r| r.cookies.clone())
686 }
687
688 /// Inject cookies into the current page.
689 ///
690 /// Seeds session tokens or other state without needing a full
691 /// [`SessionSnapshot`][crate::session::SessionSnapshot] and without
692 /// requiring a direct `chromiumoxide` dependency in calling code.
693 ///
694 /// Individual cookie failures are logged as warnings and do not abort the
695 /// remaining cookies.
696 ///
697 /// # Errors
698 ///
699 /// Returns [`BrowserError::Timeout`] if a single `Network.setCookie` CDP
700 /// call exceeds `cdp_timeout`.
701 ///
702 /// # Example
703 ///
704 /// ```no_run
705 /// use stygian_browser::{BrowserPool, BrowserConfig};
706 /// use stygian_browser::session::SessionCookie;
707 /// use std::time::Duration;
708 ///
709 /// # async fn run() -> stygian_browser::error::Result<()> {
710 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
711 /// let handle = pool.acquire().await?;
712 /// let page = handle.browser().expect("valid browser").new_page().await?;
713 /// let cookies = vec![SessionCookie {
714 /// name: "session".to_string(),
715 /// value: "abc123".to_string(),
716 /// domain: ".example.com".to_string(),
717 /// path: "/".to_string(),
718 /// expires: -1.0,
719 /// http_only: true,
720 /// secure: true,
721 /// same_site: "Lax".to_string(),
722 /// }];
723 /// page.inject_cookies(&cookies).await?;
724 /// # Ok(())
725 /// # }
726 /// ```
727 pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
728 use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
729
730 for cookie in cookies {
731 let params = match SetCookieParams::builder()
732 .name(cookie.name.clone())
733 .value(cookie.value.clone())
734 .domain(cookie.domain.clone())
735 .path(cookie.path.clone())
736 .http_only(cookie.http_only)
737 .secure(cookie.secure)
738 .build()
739 {
740 Ok(p) => p,
741 Err(e) => {
742 warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
743 continue;
744 }
745 };
746
747 match timeout(self.cdp_timeout, self.page.execute(params)).await {
748 Err(_) => {
749 warn!(
750 cookie = %cookie.name,
751 timeout_ms = self.cdp_timeout.as_millis(),
752 "Timed out injecting cookie"
753 );
754 }
755 Ok(Err(e)) => {
756 warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
757 }
758 Ok(Ok(_)) => {}
759 }
760 }
761
762 debug!(count = cookies.len(), "Cookies injected");
763 Ok(())
764 }
765
766 /// Capture a screenshot of the current page as PNG bytes.
767 ///
768 /// The screenshot is full-page by default (viewport clipped to the rendered
769 /// layout area). Save the returned bytes to a `.png` file or process
770 /// them in-memory.
771 ///
772 /// # Errors
773 ///
774 /// Returns [`BrowserError::CdpError`] if the CDP `Page.captureScreenshot`
775 /// command fails, or [`BrowserError::Timeout`] if it exceeds
776 /// `cdp_timeout`.
777 ///
778 /// # Example
779 ///
780 /// ```no_run
781 /// use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
782 /// use std::{time::Duration, fs};
783 ///
784 /// # async fn run() -> stygian_browser::error::Result<()> {
785 /// let pool = BrowserPool::new(BrowserConfig::default()).await?;
786 /// let handle = pool.acquire().await?;
787 /// let mut page = handle.browser().expect("valid browser").new_page().await?;
788 /// page.navigate("https://example.com", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
789 /// let png = page.screenshot().await?;
790 /// fs::write("screenshot.png", &png).unwrap();
791 /// # Ok(())
792 /// # }
793 /// ```
794 pub async fn screenshot(&self) -> Result<Vec<u8>> {
795 use chromiumoxide::page::ScreenshotParams;
796
797 let params = ScreenshotParams::builder().full_page(true).build();
798
799 timeout(self.cdp_timeout, self.page.screenshot(params))
800 .await
801 .map_err(|_| BrowserError::Timeout {
802 operation: "Page.captureScreenshot".to_string(),
803 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
804 })?
805 .map_err(|e| BrowserError::CdpError {
806 operation: "Page.captureScreenshot".to_string(),
807 message: e.to_string(),
808 })
809 }
810
811 /// Borrow the underlying chromiumoxide [`Page`].
812 pub const fn inner(&self) -> &Page {
813 &self.page
814 }
815
816 /// Close this page (tab).
817 ///
818 /// Called automatically on drop; explicit call avoids suppressing the error.
819 pub async fn close(self) -> Result<()> {
820 timeout(Duration::from_secs(5), self.page.clone().close())
821 .await
822 .map_err(|_| BrowserError::Timeout {
823 operation: "page.close".to_string(),
824 duration_ms: 5000,
825 })?
826 .map_err(|e| BrowserError::CdpError {
827 operation: "page.close".to_string(),
828 message: e.to_string(),
829 })
830 }
831}
832
833impl Drop for PageHandle {
834 fn drop(&mut self) {
835 warn!("PageHandle dropped without explicit close(); spawning cleanup task");
836 // chromiumoxide Page does not implement close on Drop, so we spawn
837 // a fire-and-forget task. The page ref is already owned; we need to
838 // swap it out. We clone the Page handle (it's Arc-backed internally).
839 let page = self.page.clone();
840 tokio::spawn(async move {
841 let _ = page.close().await;
842 });
843 }
844}
845
846// ─── Tests ────────────────────────────────────────────────────────────────────
847
848#[cfg(test)]
849mod tests {
850 use super::*;
851
852 #[test]
853 fn resource_filter_block_media_blocks_image() {
854 let filter = ResourceFilter::block_media();
855 assert!(filter.should_block("Image"));
856 assert!(filter.should_block("Font"));
857 assert!(filter.should_block("Stylesheet"));
858 assert!(filter.should_block("Media"));
859 assert!(!filter.should_block("Script"));
860 assert!(!filter.should_block("XHR"));
861 }
862
863 #[test]
864 fn resource_filter_case_insensitive() {
865 let filter = ResourceFilter::block_images_and_fonts();
866 assert!(filter.should_block("image")); // lowercase
867 assert!(filter.should_block("IMAGE")); // uppercase
868 assert!(!filter.should_block("Stylesheet"));
869 }
870
871 #[test]
872 fn resource_filter_builder_chain() {
873 let filter = ResourceFilter::default()
874 .block(ResourceType::Image)
875 .block(ResourceType::Font);
876 assert!(filter.should_block("Image"));
877 assert!(filter.should_block("Font"));
878 assert!(!filter.should_block("Stylesheet"));
879 }
880
881 #[test]
882 fn resource_filter_dedup_block() {
883 let filter = ResourceFilter::default()
884 .block(ResourceType::Image)
885 .block(ResourceType::Image); // duplicate
886 assert_eq!(filter.blocked.len(), 1);
887 }
888
889 #[test]
890 fn resource_filter_is_empty_when_default() {
891 assert!(ResourceFilter::default().is_empty());
892 assert!(!ResourceFilter::block_media().is_empty());
893 }
894
895 #[test]
896 fn wait_until_selector_stores_string() {
897 let w = WaitUntil::Selector("#foo".to_string());
898 assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
899 }
900
901 #[test]
902 fn resource_type_cdp_str() {
903 assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
904 assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
905 assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
906 assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
907 }
908
909 /// `PageHandle` must be `Send + Sync` for use across thread boundaries.
910 #[test]
911 fn page_handle_is_send_sync() {
912 fn assert_send<T: Send>() {}
913 fn assert_sync<T: Sync>() {}
914 assert_send::<PageHandle>();
915 assert_sync::<PageHandle>();
916 }
917
918 /// The status-code sentinel (0 = "not yet captured") and the conversion to
919 /// `Option<u16>` are pure-logic invariants testable without a live browser.
920 #[test]
921 fn status_code_sentinel_zero_maps_to_none() {
922 use std::sync::atomic::{AtomicU16, Ordering};
923 let atom = AtomicU16::new(0);
924 let code = atom.load(Ordering::Acquire);
925 assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
926 }
927
928 #[test]
929 fn status_code_non_zero_maps_to_some() {
930 use std::sync::atomic::{AtomicU16, Ordering};
931 for &expected in &[200u16, 301, 404, 503] {
932 let atom = AtomicU16::new(expected);
933 let code = atom.load(Ordering::Acquire);
934 assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
935 }
936 }
937}