1use std::collections::HashMap;
31use std::sync::{
32 Arc,
33 atomic::{AtomicU16, Ordering},
34};
35use std::time::Duration;
36
37use chromiumoxide::Page;
38use serde::{Deserialize, Serialize};
39use tokio::time::timeout;
40use tracing::{debug, warn};
41
42use crate::error::{BrowserError, Result};
43
44#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum ResourceType {
49 Image,
51 Font,
53 Stylesheet,
55 Media,
57}
58
59impl ResourceType {
60 #[must_use]
61 pub const fn as_cdp_str(&self) -> &'static str {
62 match self {
63 Self::Image => "Image",
64 Self::Font => "Font",
65 Self::Stylesheet => "Stylesheet",
66 Self::Media => "Media",
67 }
68 }
69}
70
71#[derive(Debug, Clone, Default)]
82pub struct ResourceFilter {
83 blocked: Vec<ResourceType>,
84}
85
86impl ResourceFilter {
87 #[must_use]
89 pub fn block_media() -> Self {
90 Self {
91 blocked: vec![
92 ResourceType::Image,
93 ResourceType::Font,
94 ResourceType::Stylesheet,
95 ResourceType::Media,
96 ],
97 }
98 }
99
100 #[must_use]
101 pub fn block_images_and_fonts() -> Self {
102 Self {
103 blocked: vec![ResourceType::Image, ResourceType::Font],
104 }
105 }
106
107 #[must_use]
108 pub fn block(mut self, resource: ResourceType) -> Self {
109 if !self.blocked.contains(&resource) {
110 self.blocked.push(resource);
111 }
112 self
113 }
114
115 #[must_use]
116 pub fn should_block(&self, cdp_type: &str) -> bool {
117 self.blocked
118 .iter()
119 .any(|r| r.as_cdp_str().eq_ignore_ascii_case(cdp_type))
120 }
121
122 #[must_use]
123 pub const fn is_empty(&self) -> bool {
124 self.blocked.is_empty()
125 }
126}
127
128#[derive(Debug, Clone)]
138pub enum WaitUntil {
139 DomContentLoaded,
142 NetworkIdle,
143 Selector(String),
144}
145
146#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
176pub enum OuterHtmlStrategy {
177 #[default]
179 Current,
180 Recursive,
183}
184
185impl OuterHtmlStrategy {
186 #[must_use]
188 pub const fn as_str(&self) -> &'static str {
189 match self {
190 Self::Current => "Current",
191 Self::Recursive => "Recursive",
192 }
193 }
194
195 #[must_use]
198 pub const fn all() -> [Self; 2] {
199 [Self::Current, Self::Recursive]
200 }
201}
202
203impl std::fmt::Display for OuterHtmlStrategy {
204 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
205 f.write_str(self.as_str())
206 }
207}
208
209#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
221pub enum OuterHtmlResult {
222 Empty,
226 Content(String),
228 Failed {
232 backends: Vec<&'static str>,
234 },
235}
236
237impl OuterHtmlResult {
238 #[must_use]
241 pub const fn content(&self) -> Option<&str> {
242 match self {
243 Self::Content(s) => Some(s.as_str()),
244 Self::Empty | Self::Failed { .. } => None,
245 }
246 }
247
248 #[must_use]
251 pub const fn is_empty(&self) -> bool {
252 match self {
253 Self::Content(s) => s.is_empty(),
254 Self::Empty | Self::Failed { .. } => true,
255 }
256 }
257}
258
259impl std::fmt::Display for OuterHtmlResult {
260 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261 match self {
262 Self::Empty => f.write_str("Empty"),
263 Self::Content(s) => write!(f, "Content({} bytes)", s.len()),
264 Self::Failed { backends } => write!(f, "Failed({})", backends.join(", ")),
265 }
266 }
267}
268
269pub struct NodeHandle {
300 element: chromiumoxide::element::Element,
301 selector: Arc<str>,
304 cdp_timeout: Duration,
305 page: chromiumoxide::Page,
307}
308
309impl NodeHandle {
310 pub async fn attr(&self, name: &str) -> Result<Option<String>> {
319 timeout(self.cdp_timeout, self.element.attribute(name))
320 .await
321 .map_err(|_| BrowserError::Timeout {
322 operation: "NodeHandle::attr".to_string(),
323 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
324 })?
325 .map_err(|e| self.cdp_err_or_stale(&e, "attr"))
326 }
327
328 pub async fn attr_map(&self) -> Result<HashMap<String, String>> {
339 let flat = timeout(self.cdp_timeout, self.element.attributes())
340 .await
341 .map_err(|_| BrowserError::Timeout {
342 operation: "NodeHandle::attr_map".to_string(),
343 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
344 })?
345 .map_err(|e| self.cdp_err_or_stale(&e, "attr_map"))?;
346
347 let mut map = HashMap::with_capacity(flat.len() / 2);
348 for pair in flat.chunks_exact(2) {
349 if let [name, value] = pair {
350 map.insert(name.clone(), value.clone());
351 }
352 }
353 Ok(map)
354 }
355
356 pub async fn text_content(&self) -> Result<String> {
367 let returns = timeout(
368 self.cdp_timeout,
369 self.element
370 .call_js_fn(r"function() { return this.textContent ?? ''; }", true),
371 )
372 .await
373 .map_err(|_| BrowserError::Timeout {
374 operation: "NodeHandle::text_content".to_string(),
375 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
376 })?
377 .map_err(|e| self.cdp_err_or_stale(&e, "text_content"))?;
378
379 Ok(returns
380 .result
381 .value
382 .as_ref()
383 .and_then(|v| v.as_str())
384 .unwrap_or("")
385 .to_string())
386 }
387
388 pub async fn inner_html(&self) -> Result<String> {
395 timeout(self.cdp_timeout, self.element.inner_html())
396 .await
397 .map_err(|_| BrowserError::Timeout {
398 operation: "NodeHandle::inner_html".to_string(),
399 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
400 })?
401 .map_err(|e| self.cdp_err_or_stale(&e, "inner_html"))
402 .map(Option::unwrap_or_default)
403 }
404
405 pub async fn outer_html(&self) -> Result<String> {
437 match self
438 .outer_html_with_strategy(OuterHtmlStrategy::Current)
439 .await?
440 {
441 OuterHtmlResult::Content(s) => Ok(s),
442 OuterHtmlResult::Empty | OuterHtmlResult::Failed { .. } => Ok(String::new()),
443 }
444 }
445
446 pub async fn outer_html_with_strategy(
495 &self,
496 strategy: OuterHtmlStrategy,
497 ) -> Result<OuterHtmlResult> {
498 match strategy {
499 OuterHtmlStrategy::Current => self.outer_html_current().await,
500 OuterHtmlStrategy::Recursive => self.outer_html_recursive().await,
501 }
502 }
503
504 async fn outer_html_current(&self) -> Result<OuterHtmlResult> {
506 let primary = timeout(self.cdp_timeout, self.element.outer_html())
507 .await
508 .map_err(|_| BrowserError::Timeout {
509 operation: "NodeHandle::outer_html_with_strategy(Current)".to_string(),
510 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
511 })?
512 .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_current"))?;
513
514 if let Some(html) = primary
515 && !html.trim().is_empty()
516 {
517 return Ok(OuterHtmlResult::Content(html));
518 }
519
520 let fallback_html = self.outer_html_via_js().await?;
521 if !fallback_html.trim().is_empty() {
522 return Ok(OuterHtmlResult::Content(fallback_html));
523 }
524
525 Ok(OuterHtmlResult::Empty)
526 }
527
528 async fn outer_html_recursive(&self) -> Result<OuterHtmlResult> {
534 use chromiumoxide::cdp::browser_protocol::dom::{GetOuterHtmlParams, GetOuterHtmlReturns};
535 use chromiumoxide::types::CommandResponse;
536
537 let mut failed_backends: Vec<&'static str> = Vec::new();
538
539 let primary = timeout(
540 self.cdp_timeout,
541 self.page.execute(
542 GetOuterHtmlParams::builder()
543 .node_id(self.element.node_id)
544 .build(),
545 ),
546 )
547 .await
548 .map_err(|_| BrowserError::Timeout {
549 operation: "NodeHandle::outer_html_with_strategy(Recursive)".to_string(),
550 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
551 })?
552 .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_recursive::DOM.getOuterHTML"));
553
554 match primary {
555 Ok(CommandResponse {
556 result: GetOuterHtmlReturns { outer_html },
557 ..
558 }) if !outer_html.trim().is_empty() => {
559 return Ok(OuterHtmlResult::Content(outer_html));
560 }
561 Ok(CommandResponse {
562 result: GetOuterHtmlReturns { outer_html },
563 ..
564 }) => {
565 debug!(
566 selector = %self.selector,
567 bytes = outer_html.len(),
568 "DOM.getOuterHTML returned empty payload; falling back to DOM.describeNode walk"
569 );
570 }
571 Err(e) => {
572 failed_backends.push("DOM.getOuterHTML");
573 debug!(
574 selector = %self.selector,
575 error = %e,
576 "DOM.getOuterHTML failed; falling back to DOM.describeNode walk"
577 );
578 }
579 }
580
581 match self.outer_html_via_rust_walk().await {
582 Ok(html) if !html.trim().is_empty() => Ok(OuterHtmlResult::Content(html)),
583 Ok(_) => {
584 if failed_backends.is_empty() {
585 Ok(OuterHtmlResult::Empty)
588 } else {
589 Ok(OuterHtmlResult::Failed {
594 backends: failed_backends,
595 })
596 }
597 }
598 Err(e) => {
599 failed_backends.push("DOM.describeNode-walk");
600 debug!(
601 selector = %self.selector,
602 error = %e,
603 "Rust-side DOM.describeNode walk failed"
604 );
605 Ok(OuterHtmlResult::Failed {
606 backends: failed_backends,
607 })
608 }
609 }
610 }
611
612 async fn outer_html_via_rust_walk(&self) -> Result<String> {
616 use chromiumoxide::cdp::browser_protocol::dom::DescribeNodeParams;
617 use chromiumoxide::types::CommandResponse;
618
619 let described: CommandResponse<
620 chromiumoxide::cdp::browser_protocol::dom::DescribeNodeReturns,
621 > = timeout(
622 self.cdp_timeout,
623 self.page.execute(
624 DescribeNodeParams::builder()
625 .node_id(self.element.node_id)
626 .depth(-1)
627 .build(),
628 ),
629 )
630 .await
631 .map_err(|_| BrowserError::Timeout {
632 operation: "NodeHandle::outer_html_via_rust_walk".to_string(),
633 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
634 })?
635 .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_via_rust_walk"))?;
636
637 Ok(serialize_node_tree(&described.node))
638 }
639
640 async fn outer_html_via_js(&self) -> Result<String> {
641 let returns = timeout(
642 self.cdp_timeout,
643 self.element.call_js_fn(
644 r"function() {
645 if (typeof this.outerHTML === 'string' && this.outerHTML.length > 0) {
646 return this.outerHTML;
647 }
648 try {
649 return new XMLSerializer().serializeToString(this);
650 } catch (_) {
651 return '';
652 }
653 }",
654 true,
655 ),
656 )
657 .await
658 .map_err(|_| BrowserError::Timeout {
659 operation: "NodeHandle::outer_html_via_js".to_string(),
660 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
661 })?
662 .map_err(|e| self.cdp_err_or_stale(&e, "outer_html_via_js"))?;
663
664 Ok(returns
665 .result
666 .value
667 .as_ref()
668 .and_then(serde_json::Value::as_str)
669 .unwrap_or_default()
670 .to_string())
671 }
672
673 pub async fn ancestors(&self) -> Result<Vec<String>> {
685 let returns = timeout(
686 self.cdp_timeout,
687 self.element.call_js_fn(
688 r"function() {
689 const a = [];
690 let n = this.parentElement;
691 while (n) { a.push(n.tagName.toLowerCase()); n = n.parentElement; }
692 return a;
693 }",
694 true,
695 ),
696 )
697 .await
698 .map_err(|_| BrowserError::Timeout {
699 operation: "NodeHandle::ancestors".to_string(),
700 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
701 })?
702 .map_err(|e| self.cdp_err_or_stale(&e, "ancestors"))?;
703
704 let arr = returns
708 .result
709 .value
710 .as_ref()
711 .and_then(|v| v.as_array())
712 .ok_or_else(|| BrowserError::ScriptExecutionFailed {
713 script: "NodeHandle::ancestors".to_string(),
714 reason: "CDP returned no value or a non-array value for ancestors()".to_string(),
715 })?;
716
717 arr.iter()
718 .map(|v| {
719 v.as_str().map(ToString::to_string).ok_or_else(|| {
720 BrowserError::ScriptExecutionFailed {
721 script: "NodeHandle::ancestors".to_string(),
722 reason: format!("ancestor entry is not a string: {v}"),
723 }
724 })
725 })
726 .collect()
727 }
728
729 pub async fn children_matching(&self, selector: &str) -> Result<Vec<Self>> {
736 let elements = timeout(self.cdp_timeout, self.element.find_elements(selector))
737 .await
738 .map_err(|_| BrowserError::Timeout {
739 operation: "NodeHandle::children_matching".to_string(),
740 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
741 })?
742 .map_err(|e| self.cdp_err_or_stale(&e, "children_matching"))?;
743
744 let selector_arc: Arc<str> = Arc::from(selector);
745 Ok(elements
746 .into_iter()
747 .map(|el| Self {
748 element: el,
749 selector: selector_arc.clone(),
750 cdp_timeout: self.cdp_timeout,
751 page: self.page.clone(),
752 })
753 .collect())
754 }
755
756 pub async fn parent(&self) -> Result<Option<Self>> {
787 let attr = format!(
788 "data-stygian-t-{}",
789 ulid::Ulid::new().to_string().to_lowercase()
790 );
791 let js = format!(
792 "function() {{ \
793 var t = this.parentElement; \
794 if (!t) {{ return false; }} \
795 t.setAttribute('{attr}', '1'); \
796 return true; \
797 }}"
798 );
799 self.call_traversal(&js, &attr, "parent").await
800 }
801
802 pub async fn next_sibling(&self) -> Result<Option<Self>> {
830 let attr = format!(
831 "data-stygian-t-{}",
832 ulid::Ulid::new().to_string().to_lowercase()
833 );
834 let js = format!(
835 "function() {{ \
836 var t = this.nextElementSibling; \
837 if (!t) {{ return false; }} \
838 t.setAttribute('{attr}', '1'); \
839 return true; \
840 }}"
841 );
842 self.call_traversal(&js, &attr, "next").await
843 }
844
845 pub async fn previous_sibling(&self) -> Result<Option<Self>> {
873 let attr = format!(
874 "data-stygian-t-{}",
875 ulid::Ulid::new().to_string().to_lowercase()
876 );
877 let js = format!(
878 "function() {{ \
879 var t = this.previousElementSibling; \
880 if (!t) {{ return false; }} \
881 t.setAttribute('{attr}', '1'); \
882 return true; \
883 }}"
884 );
885 self.call_traversal(&js, &attr, "prev").await
886 }
887
888 async fn call_traversal(
906 &self,
907 js_fn: &str,
908 attr_name: &str,
909 selector_suffix: &str,
910 ) -> Result<Option<Self>> {
911 let op_tag = format!("NodeHandle::{selector_suffix}::tag");
913 let returns = timeout(self.cdp_timeout, self.element.call_js_fn(js_fn, false))
914 .await
915 .map_err(|_| BrowserError::Timeout {
916 operation: op_tag.clone(),
917 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
918 })?
919 .map_err(|e| self.cdp_err_or_stale(&e, selector_suffix))?;
920
921 let has_target = returns
923 .result
924 .value
925 .as_ref()
926 .and_then(serde_json::Value::as_bool)
927 .unwrap_or(false);
928 if !has_target {
929 return Ok(None);
930 }
931
932 let css = format!("[{attr_name}]");
933 let op_resolve = format!("NodeHandle::{selector_suffix}::resolve");
934 let element = timeout(self.cdp_timeout, self.page.find_element(css))
935 .await
936 .map_err(|_| BrowserError::Timeout {
937 operation: op_resolve.clone(),
938 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
939 })?
940 .map_err(|e| BrowserError::CdpError {
941 operation: op_resolve,
942 message: format!("{e:?}"),
943 })?;
944
945 let cleanup = format!("function() {{ this.removeAttribute('{attr_name}'); }}");
947 let _ = element.call_js_fn(cleanup, false).await;
948
949 let new_selector: Arc<str> =
950 Arc::from(format!("{}::{selector_suffix}", self.selector).as_str());
951 Ok(Some(Self {
952 element,
953 selector: new_selector,
954 cdp_timeout: self.cdp_timeout,
955 page: self.page.clone(),
956 }))
957 }
958
959 fn cdp_err_or_stale(
961 &self,
962 err: &chromiumoxide::error::CdpError,
963 operation: &str,
964 ) -> BrowserError {
965 let msg = format!("{err:?}");
966 if msg.contains("Cannot find object with id")
967 || msg.contains("context with specified id")
968 || msg.contains("Cannot find context")
969 {
970 BrowserError::StaleNode {
971 selector: self.selector.to_string(),
972 }
973 } else {
974 BrowserError::CdpError {
975 operation: operation.to_string(),
976 message: msg,
977 }
978 }
979 }
980}
981
982pub struct PageHandle {
1005 page: Page,
1006 cdp_timeout: Duration,
1007 last_status_code: Arc<AtomicU16>,
1009 resource_filter_task: Option<tokio::task::JoinHandle<()>>,
1012}
1013
1014impl PageHandle {
1015 pub(crate) fn new(page: Page, cdp_timeout: Duration) -> Self {
1017 Self {
1018 page,
1019 cdp_timeout,
1020 last_status_code: Arc::new(AtomicU16::new(0)),
1021 resource_filter_task: None,
1022 }
1023 }
1024
1025 pub async fn navigate(
1030 &mut self,
1031 url: &str,
1032 condition: WaitUntil,
1033 nav_timeout: Duration,
1034 ) -> Result<()> {
1035 self.setup_status_capture().await;
1036 timeout(
1037 nav_timeout,
1038 self.navigate_inner(url, condition, nav_timeout),
1039 )
1040 .await
1041 .map_err(|_| BrowserError::NavigationFailed {
1042 url: url.to_string(),
1043 reason: format!("navigation timed out after {nav_timeout:?}"),
1044 })?
1045 }
1046
1047 async fn setup_status_capture(&self) {
1050 use chromiumoxide::cdp::browser_protocol::network::{
1051 EventResponseReceived, ResourceType as NetworkResourceType,
1052 };
1053 use futures::StreamExt;
1054
1055 self.last_status_code.store(0, Ordering::Release);
1057
1058 let page_for_listener = self.page.clone();
1059 let status_capture = Arc::clone(&self.last_status_code);
1060 match page_for_listener
1061 .event_listener::<EventResponseReceived>()
1062 .await
1063 {
1064 Ok(mut stream) => {
1065 tokio::spawn(async move {
1066 while let Some(event) = stream.next().await {
1067 if event.r#type == NetworkResourceType::Document {
1068 let code = u16::try_from(event.response.status).unwrap_or(0);
1069 if code > 0 {
1070 status_capture.store(code, Ordering::Release);
1071 }
1072 break;
1073 }
1074 }
1075 });
1076 }
1077 Err(e) => warn!("status-code capture unavailable: {e}"),
1078 }
1079 }
1080
1081 async fn navigate_inner(
1083 &self,
1084 url: &str,
1085 condition: WaitUntil,
1086 nav_timeout: Duration,
1087 ) -> Result<()> {
1088 use chromiumoxide::cdp::browser_protocol::page::{
1089 EventDomContentEventFired, EventLoadEventFired,
1090 };
1091 use futures::StreamExt;
1092
1093 let url_owned = url.to_string();
1094
1095 let mut dom_events = match &condition {
1096 WaitUntil::DomContentLoaded => Some(
1097 self.page
1098 .event_listener::<EventDomContentEventFired>()
1099 .await
1100 .map_err(|e| BrowserError::NavigationFailed {
1101 url: url_owned.clone(),
1102 reason: format!("{e:?}"),
1103 })?,
1104 ),
1105 _ => None,
1106 };
1107
1108 let mut load_events = match &condition {
1109 WaitUntil::NetworkIdle => Some(
1110 self.page
1111 .event_listener::<EventLoadEventFired>()
1112 .await
1113 .map_err(|e| BrowserError::NavigationFailed {
1114 url: url_owned.clone(),
1115 reason: e.to_string(),
1116 })?,
1117 ),
1118 _ => None,
1119 };
1120
1121 let inflight = if matches!(condition, WaitUntil::NetworkIdle) {
1122 Some(self.subscribe_inflight_counter().await)
1123 } else {
1124 None
1125 };
1126
1127 self.page
1128 .goto(url)
1129 .await
1130 .map_err(|e| BrowserError::NavigationFailed {
1131 url: url_owned.clone(),
1132 reason: e.to_string(),
1133 })?;
1134
1135 match &condition {
1136 WaitUntil::DomContentLoaded => {
1137 if let Some(ref mut events) = dom_events {
1138 let _ = events.next().await;
1139 }
1140 }
1141 WaitUntil::NetworkIdle => {
1142 if let Some(ref mut events) = load_events {
1143 let _ = events.next().await;
1144 }
1145 if let Some(ref counter) = inflight {
1146 Self::wait_network_idle(counter).await;
1147 }
1148 }
1149 WaitUntil::Selector(css) => {
1150 self.wait_for_selector(css, nav_timeout).await?;
1151 }
1152 }
1153 Ok(())
1154 }
1155
1156 async fn subscribe_inflight_counter(&self) -> Arc<std::sync::atomic::AtomicI32> {
1160 use std::sync::atomic::AtomicI32;
1161
1162 use chromiumoxide::cdp::browser_protocol::network::{
1163 EventLoadingFailed, EventLoadingFinished, EventRequestWillBeSent,
1164 };
1165 use futures::StreamExt;
1166
1167 let counter: Arc<AtomicI32> = Arc::new(AtomicI32::new(0));
1168 let pairs: [(Arc<AtomicI32>, i32); 3] = [
1169 (Arc::clone(&counter), 1),
1170 (Arc::clone(&counter), -1),
1171 (Arc::clone(&counter), -1),
1172 ];
1173 let [p1, p2, p3] = [self.page.clone(), self.page.clone(), self.page.clone()];
1174
1175 macro_rules! spawn_tracker {
1176 ($page:expr, $event:ty, $c:expr, $delta:expr) => {
1177 match $page.event_listener::<$event>().await {
1178 Ok(mut s) => {
1179 let c = $c;
1180 let d = $delta;
1181 tokio::spawn(async move {
1182 while s.next().await.is_some() {
1183 c.fetch_add(d, Ordering::Relaxed);
1184 }
1185 });
1186 }
1187 Err(e) => warn!("network-idle tracker unavailable: {e}"),
1188 }
1189 };
1190 }
1191
1192 let [(c1, d1), (c2, d2), (c3, d3)] = pairs;
1193 spawn_tracker!(p1, EventRequestWillBeSent, c1, d1);
1194 spawn_tracker!(p2, EventLoadingFinished, c2, d2);
1195 spawn_tracker!(p3, EventLoadingFailed, c3, d3);
1196
1197 counter
1198 }
1199
1200 async fn wait_network_idle(counter: &Arc<std::sync::atomic::AtomicI32>) {
1201 const IDLE_THRESHOLD: i32 = 2;
1202 const SETTLE: Duration = Duration::from_millis(500);
1203 loop {
1204 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
1205 tokio::time::sleep(SETTLE).await;
1206 if counter.load(Ordering::Relaxed) <= IDLE_THRESHOLD {
1207 break;
1208 }
1209 } else {
1210 tokio::time::sleep(Duration::from_millis(50)).await;
1211 }
1212 }
1213 }
1214
1215 pub async fn wait_for_selector(&self, selector: &str, wait_timeout: Duration) -> Result<()> {
1220 let selector_owned = selector.to_string();
1221 let poll = async {
1222 loop {
1223 if self.page.find_element(selector_owned.clone()).await.is_ok() {
1224 return Ok(());
1225 }
1226 tokio::time::sleep(Duration::from_millis(100)).await;
1227 }
1228 };
1229
1230 timeout(wait_timeout, poll)
1231 .await
1232 .map_err(|_| BrowserError::NavigationFailed {
1233 url: String::new(),
1234 reason: format!("selector '{selector_owned}' not found within {wait_timeout:?}"),
1235 })?
1236 }
1237
1238 pub async fn set_resource_filter(&mut self, filter: ResourceFilter) -> Result<()> {
1246 use chromiumoxide::cdp::browser_protocol::fetch::{
1247 ContinueRequestParams, EnableParams, EventRequestPaused, FailRequestParams,
1248 RequestPattern,
1249 };
1250 use chromiumoxide::cdp::browser_protocol::network::ErrorReason;
1251 use futures::StreamExt as _;
1252
1253 if filter.is_empty() {
1254 return Ok(());
1255 }
1256
1257 if let Some(task) = self.resource_filter_task.take() {
1259 task.abort();
1260 }
1261
1262 let pattern = RequestPattern::builder().url_pattern("*").build();
1263 let params = EnableParams::builder()
1264 .patterns(vec![pattern])
1265 .handle_auth_requests(false)
1266 .build();
1267
1268 timeout(self.cdp_timeout, self.page.execute::<EnableParams>(params))
1269 .await
1270 .map_err(|_| BrowserError::Timeout {
1271 operation: "Fetch.enable".to_string(),
1272 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1273 })?
1274 .map_err(|e| BrowserError::CdpError {
1275 operation: "Fetch.enable".to_string(),
1276 message: e.to_string(),
1277 })?;
1278
1279 let mut events = self
1282 .page
1283 .event_listener::<EventRequestPaused>()
1284 .await
1285 .map_err(|e| BrowserError::CdpError {
1286 operation: "Fetch.requestPaused subscribe".to_string(),
1287 message: e.to_string(),
1288 })?;
1289
1290 let page = self.page.clone();
1291 debug!("Resource filter active: {:?}", filter);
1292 let task = tokio::spawn(async move {
1293 while let Some(event) = events.next().await {
1294 let request_id = event.request_id.clone();
1295 if filter.should_block(event.resource_type.as_ref()) {
1296 let params = FailRequestParams::new(request_id, ErrorReason::BlockedByClient);
1297 let _ = page.execute(params).await;
1298 } else {
1299 let _ = page.execute(ContinueRequestParams::new(request_id)).await;
1300 }
1301 }
1302 });
1303
1304 self.resource_filter_task = Some(task);
1305 Ok(())
1306 }
1307
1308 pub async fn url(&self) -> Result<String> {
1335 timeout(self.cdp_timeout, self.page.url())
1336 .await
1337 .map_err(|_| BrowserError::Timeout {
1338 operation: "page.url".to_string(),
1339 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1340 })?
1341 .map_err(|e| BrowserError::CdpError {
1342 operation: "page.url".to_string(),
1343 message: e.to_string(),
1344 })
1345 .map(Option::unwrap_or_default)
1346 }
1347
1348 pub fn status_code(&self) -> Result<Option<u16>> {
1379 let code = self.last_status_code.load(Ordering::Acquire);
1380 Ok(if code == 0 { None } else { Some(code) })
1381 }
1382
1383 pub async fn title(&self) -> Result<String> {
1388 timeout(self.cdp_timeout, self.page.get_title())
1389 .await
1390 .map_err(|_| BrowserError::Timeout {
1391 operation: "get_title".to_string(),
1392 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1393 })?
1394 .map_err(|e| BrowserError::ScriptExecutionFailed {
1395 script: "document.title".to_string(),
1396 reason: e.to_string(),
1397 })
1398 .map(Option::unwrap_or_default)
1399 }
1400
1401 pub async fn content(&self) -> Result<String> {
1406 timeout(self.cdp_timeout, self.page.content())
1407 .await
1408 .map_err(|_| BrowserError::Timeout {
1409 operation: "page.content".to_string(),
1410 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1411 })?
1412 .map_err(|e| BrowserError::ScriptExecutionFailed {
1413 script: "document.documentElement.outerHTML".to_string(),
1414 reason: e.to_string(),
1415 })
1416 }
1417
1418 pub async fn query_selector_all(&self, selector: &str) -> Result<Vec<NodeHandle>> {
1450 let elements = timeout(self.cdp_timeout, self.page.find_elements(selector))
1451 .await
1452 .map_err(|_| BrowserError::Timeout {
1453 operation: "PageHandle::query_selector_all".to_string(),
1454 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1455 })?
1456 .map_err(|e| BrowserError::CdpError {
1457 operation: "PageHandle::query_selector_all".to_string(),
1458 message: e.to_string(),
1459 })?;
1460
1461 let selector_arc: Arc<str> = Arc::from(selector);
1462 Ok(elements
1463 .into_iter()
1464 .map(|el| NodeHandle {
1465 element: el,
1466 selector: selector_arc.clone(),
1467 cdp_timeout: self.cdp_timeout,
1468 page: self.page.clone(),
1469 })
1470 .collect())
1471 }
1472
1473 pub async fn eval<T: serde::de::DeserializeOwned>(&self, script: &str) -> Result<T> {
1479 let script_owned = script.to_string();
1480 timeout(self.cdp_timeout, self.page.evaluate(script))
1481 .await
1482 .map_err(|_| BrowserError::Timeout {
1483 operation: "page.evaluate".to_string(),
1484 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1485 })?
1486 .map_err(|e| BrowserError::ScriptExecutionFailed {
1487 script: script_owned.clone(),
1488 reason: e.to_string(),
1489 })?
1490 .into_value::<T>()
1491 .map_err(|e| BrowserError::ScriptExecutionFailed {
1492 script: script_owned,
1493 reason: e.to_string(),
1494 })
1495 }
1496
1497 pub async fn save_cookies(
1501 &self,
1502 ) -> Result<Vec<chromiumoxide::cdp::browser_protocol::network::Cookie>> {
1503 use chromiumoxide::cdp::browser_protocol::network::GetCookiesParams;
1504
1505 let url = self
1506 .page
1507 .url()
1508 .await
1509 .map_err(|e| BrowserError::CdpError {
1510 operation: "page.url".to_string(),
1511 message: e.to_string(),
1512 })?
1513 .unwrap_or_default();
1514
1515 timeout(
1516 self.cdp_timeout,
1517 self.page
1518 .execute(GetCookiesParams::builder().urls(vec![url]).build()),
1519 )
1520 .await
1521 .map_err(|_| BrowserError::Timeout {
1522 operation: "Network.getCookies".to_string(),
1523 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1524 })?
1525 .map_err(|e| BrowserError::CdpError {
1526 operation: "Network.getCookies".to_string(),
1527 message: e.to_string(),
1528 })
1529 .map(|r| r.cookies.clone())
1530 }
1531
1532 pub async fn inject_cookies(&self, cookies: &[crate::session::SessionCookie]) -> Result<()> {
1569 use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
1570
1571 for cookie in cookies {
1572 let params = match SetCookieParams::builder()
1573 .name(cookie.name.clone())
1574 .value(cookie.value.clone())
1575 .domain(cookie.domain.clone())
1576 .path(cookie.path.clone())
1577 .http_only(cookie.http_only)
1578 .secure(cookie.secure)
1579 .build()
1580 {
1581 Ok(p) => p,
1582 Err(e) => {
1583 warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
1584 continue;
1585 }
1586 };
1587
1588 match timeout(self.cdp_timeout, self.page.execute(params)).await {
1589 Err(_) => {
1590 warn!(
1591 cookie = %cookie.name,
1592 timeout_ms = self.cdp_timeout.as_millis(),
1593 "Timed out injecting cookie"
1594 );
1595 }
1596 Ok(Err(e)) => {
1597 warn!(cookie = %cookie.name, error = %e, "Failed to inject cookie");
1598 }
1599 Ok(Ok(_)) => {}
1600 }
1601 }
1602
1603 debug!(count = cookies.len(), "Cookies injected");
1604 Ok(())
1605 }
1606
1607 pub async fn screenshot(&self) -> Result<Vec<u8>> {
1632 use chromiumoxide::page::ScreenshotParams;
1633
1634 let params = ScreenshotParams::builder().full_page(true).build();
1635
1636 timeout(self.cdp_timeout, self.page.screenshot(params))
1637 .await
1638 .map_err(|_| BrowserError::Timeout {
1639 operation: "Page.captureScreenshot".to_string(),
1640 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1641 })?
1642 .map_err(|e| BrowserError::CdpError {
1643 operation: "Page.captureScreenshot".to_string(),
1644 message: e.to_string(),
1645 })
1646 }
1647
1648 #[must_use]
1650 pub const fn inner(&self) -> &Page {
1651 &self.page
1652 }
1653
1654 pub async fn close(self) -> Result<()> {
1663 timeout(Duration::from_secs(5), self.page.clone().close())
1664 .await
1665 .map_err(|_| BrowserError::Timeout {
1666 operation: "page.close".to_string(),
1667 duration_ms: 5000,
1668 })?
1669 .map_err(|e| BrowserError::CdpError {
1670 operation: "page.close".to_string(),
1671 message: e.to_string(),
1672 })
1673 }
1674}
1675
1676#[cfg(feature = "stealth")]
1679impl PageHandle {
1680 pub async fn verify_stealth(&self) -> Result<crate::diagnostic::DiagnosticReport> {
1715 use crate::diagnostic::{CheckResult, DiagnosticReport, all_checks, all_limitation_probes};
1716
1717 let mut results: Vec<CheckResult> = Vec::new();
1718 let mut known_limitations = Vec::new();
1719
1720 for check in all_checks() {
1721 let result = match self.eval::<String>(check.script).await {
1722 Ok(json) => check.parse_output(&json),
1723 Err(e) => {
1724 tracing::warn!(
1725 check = ?check.id,
1726 error = %e,
1727 "stealth check script failed during evaluation"
1728 );
1729 CheckResult {
1730 id: check.id,
1731 description: check.description.to_string(),
1732 passed: false,
1733 details: format!("script error: {e}"),
1734 }
1735 }
1736 };
1737 tracing::debug!(
1738 check = ?result.id,
1739 passed = result.passed,
1740 details = %result.details,
1741 "stealth check result"
1742 );
1743 results.push(result);
1744 }
1745
1746 for probe in all_limitation_probes() {
1747 let limitation = match self.eval::<String>(probe.script).await {
1748 Ok(json) => probe.parse_output(&json),
1749 Err(error) => Some(crate::diagnostic::KnownLimitation {
1750 id: probe.id,
1751 description: probe.description.to_string(),
1752 details: format!("script error: {error}"),
1753 }),
1754 };
1755 if let Some(limitation) = limitation {
1756 tracing::debug!(
1757 limitation = ?limitation.id,
1758 details = %limitation.details,
1759 "stealth limitation observed"
1760 );
1761 known_limitations.push(limitation);
1762 }
1763 }
1764
1765 Ok(DiagnosticReport::new(results).with_known_limitations(known_limitations))
1766 }
1767
1768 pub async fn verify_stealth_with_transport(
1779 &self,
1780 observed: Option<crate::diagnostic::TransportObservations>,
1781 ) -> Result<crate::diagnostic::DiagnosticReport> {
1782 let report = self.verify_stealth().await?;
1783
1784 let user_agent = match self.eval::<String>("navigator.userAgent").await {
1785 Ok(ua) => ua,
1786 Err(e) => {
1787 tracing::warn!(error = %e, "failed to read navigator.userAgent for transport diagnostics");
1788 String::new()
1789 }
1790 };
1791
1792 let transport = crate::diagnostic::TransportDiagnostic::from_user_agent_and_observations(
1793 &user_agent,
1794 observed.as_ref(),
1795 );
1796
1797 Ok(report.with_transport(transport))
1798 }
1799}
1800
1801#[cfg(feature = "extract")]
1804impl PageHandle {
1805 pub async fn extract_all<T>(&self, selector: &str) -> Result<Vec<T>>
1841 where
1842 T: crate::extract::Extractable,
1843 {
1844 use futures::future::try_join_all;
1845
1846 let nodes = self.query_selector_all(selector).await?;
1847 try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1848 .await
1849 .map_err(BrowserError::ExtractionFailed)
1850 }
1851
1852 pub async fn extract_all_with_fallback<T>(&self, selectors: &[&str]) -> Result<Vec<T>>
1886 where
1887 T: crate::extract::Extractable,
1888 {
1889 use futures::future::try_join_all;
1890
1891 for &selector in selectors {
1892 let nodes = self.query_selector_all(selector).await?;
1893 if nodes.is_empty() {
1894 continue;
1895 }
1896 return try_join_all(nodes.iter().map(|n| T::extract_from(n)))
1897 .await
1898 .map_err(BrowserError::ExtractionFailed);
1899 }
1900
1901 Ok(vec![])
1902 }
1903
1904 pub async fn extract_resilient<T>(&self, selector: &str) -> Result<Vec<T>>
1939 where
1940 T: crate::extract::Extractable,
1941 {
1942 use crate::extract::ExtractionError;
1943
1944 let nodes = self.query_selector_all(selector).await?;
1945 let mut results = Vec::with_capacity(nodes.len());
1946
1947 for node in &nodes {
1948 match T::extract_from(node).await {
1949 Ok(item) => results.push(item),
1950 Err(ExtractionError::Missing { .. }) => {
1951 tracing::debug!(
1952 selector,
1953 "extract_resilient: skipping node with missing required field"
1954 );
1955 }
1956 Err(e) => return Err(BrowserError::ExtractionFailed(e)),
1957 }
1958 }
1959
1960 Ok(results)
1961 }
1962}
1963
1964#[cfg(feature = "similarity")]
1967impl NodeHandle {
1968 pub async fn fingerprint(&self) -> Result<crate::similarity::ElementFingerprint> {
1978 const JS: &str = r"function() {
1979 var el = this;
1980 var tag = el.tagName.toLowerCase();
1981 var classes = Array.prototype.slice.call(el.classList).sort();
1982 var attrNames = Array.prototype.slice.call(el.attributes)
1983 .map(function(a) { return a.name; })
1984 .filter(function(n) { return n !== 'class' && n !== 'id'; })
1985 .sort();
1986 var depth = 0;
1987 var n = el.parentElement;
1988 while (n && n.tagName.toLowerCase() !== 'body') { depth++; n = n.parentElement; }
1989 return JSON.stringify({ tag: tag, classes: classes, attrNames: attrNames, depth: depth });
1990}";
1991
1992 let returns = tokio::time::timeout(self.cdp_timeout, self.element.call_js_fn(JS, true))
1993 .await
1994 .map_err(|_| BrowserError::Timeout {
1995 operation: "NodeHandle::fingerprint".to_string(),
1996 duration_ms: u64::try_from(self.cdp_timeout.as_millis()).unwrap_or(u64::MAX),
1997 })?
1998 .map_err(|e| self.cdp_err_or_stale(&e, "fingerprint"))?;
1999
2000 let json_str = returns
2001 .result
2002 .value
2003 .as_ref()
2004 .and_then(|v| v.as_str())
2005 .ok_or_else(|| BrowserError::ScriptExecutionFailed {
2006 script: "NodeHandle::fingerprint".to_string(),
2007 reason: "CDP returned no string value from fingerprint script".to_string(),
2008 })?;
2009
2010 serde_json::from_str::<crate::similarity::ElementFingerprint>(json_str).map_err(|e| {
2011 BrowserError::ScriptExecutionFailed {
2012 script: "NodeHandle::fingerprint".to_string(),
2013 reason: format!("failed to deserialise fingerprint JSON: {e}"),
2014 }
2015 })
2016 }
2017}
2018
2019#[cfg(feature = "similarity")]
2020impl PageHandle {
2021 pub async fn find_similar(
2054 &self,
2055 reference: &NodeHandle,
2056 config: crate::similarity::SimilarityConfig,
2057 ) -> Result<Vec<crate::similarity::SimilarMatch>> {
2058 use crate::similarity::{SimilarMatch, jaccard_weighted};
2059
2060 let ref_fp = reference.fingerprint().await?;
2061 let candidates = self.query_selector_all("*").await?;
2062
2063 let mut matches: Vec<SimilarMatch> = Vec::new();
2064 for node in candidates {
2065 if let Ok(cand_fp) = node.fingerprint().await {
2066 let score = jaccard_weighted(&ref_fp, &cand_fp);
2067 if score >= config.threshold {
2068 matches.push(SimilarMatch { node, score });
2069 }
2070 }
2071 }
2073
2074 matches.sort_by(|a, b| {
2075 b.score
2076 .partial_cmp(&a.score)
2077 .unwrap_or(std::cmp::Ordering::Equal)
2078 });
2079
2080 if config.max_results > 0 {
2081 matches.truncate(config.max_results);
2082 }
2083
2084 Ok(matches)
2085 }
2086}
2087
2088impl Drop for PageHandle {
2089 fn drop(&mut self) {
2090 warn!("PageHandle dropped without explicit close(); spawning cleanup task");
2091 let page = self.page.clone();
2094 tokio::spawn(async move {
2095 let _ = page.close().await;
2096 });
2097 }
2098}
2099
2100#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
2109#[serde(rename_all = "snake_case")]
2110pub enum WarmupWait {
2111 #[default]
2114 DomContentLoaded,
2115 NetworkIdle,
2118}
2119
2120impl WarmupWait {
2121 #[must_use]
2123 pub const fn into_wait_until(self) -> WaitUntil {
2124 match self {
2125 Self::DomContentLoaded => WaitUntil::DomContentLoaded,
2126 Self::NetworkIdle => WaitUntil::NetworkIdle,
2127 }
2128 }
2129}
2130
2131#[derive(Debug, Clone, Serialize, Deserialize)]
2147pub struct WarmupOptions {
2148 pub url: String,
2150 #[serde(default)]
2153 pub wait: WarmupWait,
2154 #[serde(default = "WarmupOptions::default_timeout_ms")]
2156 pub timeout_ms: u64,
2157 #[serde(default)]
2161 pub stabilize_ms: u64,
2162}
2163
2164impl WarmupOptions {
2165 #[must_use]
2167 pub const fn default_timeout_ms() -> u64 {
2168 30_000
2169 }
2170}
2171
2172impl Default for WarmupOptions {
2173 fn default() -> Self {
2174 Self {
2175 url: String::new(),
2176 wait: WarmupWait::DomContentLoaded,
2177 timeout_ms: Self::default_timeout_ms(),
2178 stabilize_ms: 0,
2179 }
2180 }
2181}
2182
2183#[derive(Debug, Clone, Serialize, Deserialize)]
2199pub struct WarmupReport {
2200 pub url: String,
2202 pub elapsed_ms: u64,
2204 pub status_code: Option<u16>,
2207 pub title: String,
2209 pub stabilized: bool,
2212}
2213
2214#[derive(Debug, Clone, Serialize, Deserialize)]
2229pub struct RefreshOptions {
2230 #[serde(default)]
2232 pub wait: WarmupWait,
2233 #[serde(default = "RefreshOptions::default_timeout_ms")]
2235 pub timeout_ms: u64,
2236 #[serde(default)]
2241 pub reset_connection: bool,
2242}
2243
2244impl RefreshOptions {
2245 #[must_use]
2247 pub const fn default_timeout_ms() -> u64 {
2248 30_000
2249 }
2250}
2251
2252impl Default for RefreshOptions {
2253 fn default() -> Self {
2254 Self {
2255 wait: WarmupWait::DomContentLoaded,
2256 timeout_ms: Self::default_timeout_ms(),
2257 reset_connection: false,
2258 }
2259 }
2260}
2261
2262#[derive(Debug, Clone, Serialize, Deserialize)]
2276pub struct RefreshReport {
2277 pub url: String,
2279 pub elapsed_ms: u64,
2281 pub status_code: Option<u16>,
2283}
2284
2285impl PageHandle {
2288 pub async fn warmup(&mut self, options: WarmupOptions) -> Result<WarmupReport> {
2322 let start = std::time::Instant::now();
2323 let nav_timeout = Duration::from_millis(options.timeout_ms);
2324 self.navigate(
2325 &options.url,
2326 options.wait.clone().into_wait_until(),
2327 nav_timeout,
2328 )
2329 .await?;
2330 let status_code = self.status_code()?;
2331 let title = self.title().await.unwrap_or_default();
2332 let stabilized = options.stabilize_ms > 0;
2333 if stabilized {
2334 tokio::time::sleep(Duration::from_millis(options.stabilize_ms)).await;
2335 }
2336 let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
2337 Ok(WarmupReport {
2338 url: options.url,
2339 elapsed_ms,
2340 status_code,
2341 title,
2342 stabilized,
2343 })
2344 }
2345
2346 pub async fn refresh(&mut self, options: RefreshOptions) -> Result<RefreshReport> {
2384 let start = std::time::Instant::now();
2385 let nav_timeout = Duration::from_millis(options.timeout_ms);
2386 let wait = options.wait.clone().into_wait_until();
2387 let current_url = self.url().await?;
2389 if current_url.is_empty() || current_url == "about:blank" {
2390 return Err(BrowserError::NavigationFailed {
2391 url: current_url,
2392 reason: "page has not been navigated yet; call warmup() or navigate() first"
2393 .to_string(),
2394 });
2395 }
2396 self.navigate(¤t_url, wait, nav_timeout).await?;
2400 let status_code = self.status_code()?;
2401 let url = self.url().await?;
2402 let elapsed_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
2403 Ok(RefreshReport {
2404 url,
2405 elapsed_ms,
2406 status_code,
2407 })
2408 }
2409}
2410
2411mod node_type {
2415 pub const ELEMENT: i64 = 1;
2417 pub const TEXT: i64 = 3;
2419 pub const CDATA_SECTION: i64 = 4;
2421 pub const PROCESSING_INSTRUCTION: i64 = 7;
2423 pub const COMMENT: i64 = 8;
2425 pub const DOCUMENT: i64 = 9;
2427 pub const DOCUMENT_TYPE: i64 = 10;
2429 pub const DOCUMENT_FRAGMENT: i64 = 11;
2431}
2432
2433const VOID_ELEMENTS: &[&str] = &[
2435 "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
2436 "source", "track", "wbr",
2437];
2438
2439fn serialize_node_tree(node: &chromiumoxide::cdp::browser_protocol::dom::Node) -> String {
2467 let mut out = String::new();
2468 serialize_node_into(&mut out, node);
2469 out
2470}
2471
2472fn serialize_node_into(out: &mut String, node: &chromiumoxide::cdp::browser_protocol::dom::Node) {
2473 match node.node_type {
2474 node_type::ELEMENT => {
2475 let tag = node.local_name.as_str();
2476 out.push('<');
2477 out.push_str(tag);
2478 if let Some(attrs) = &node.attributes {
2479 for pair in attrs.chunks_exact(2) {
2480 if let [name, value] = pair {
2481 out.push(' ');
2482 escape_attr_name(out, name);
2483 out.push_str("=\"");
2484 escape_attr_value(out, value);
2485 out.push('"');
2486 }
2487 }
2488 }
2489 if VOID_ELEMENTS.contains(&tag) {
2490 out.push('>');
2491 return;
2492 }
2493 out.push('>');
2494 serialize_inline_children(out, node);
2495 out.push_str("</");
2496 out.push_str(tag);
2497 out.push('>');
2498 }
2499 node_type::TEXT => {
2500 escape_text(out, &node.node_value);
2501 }
2502 node_type::COMMENT => {
2503 out.push_str("<!--");
2504 out.push_str(&node.node_value);
2505 out.push_str("-->");
2506 }
2507 node_type::DOCUMENT | node_type::DOCUMENT_FRAGMENT => {
2508 serialize_inline_children(out, node);
2509 }
2510 node_type::DOCUMENT_TYPE => {
2511 out.push_str("<!DOCTYPE ");
2512 out.push_str(&node.node_name);
2513 if let Some(public_id) = &node.public_id {
2514 out.push(' ');
2515 out.push_str(public_id);
2516 }
2517 if let Some(system_id) = &node.system_id {
2518 out.push(' ');
2519 out.push_str(system_id);
2520 }
2521 out.push('>');
2522 }
2523 node_type::CDATA_SECTION => {
2524 out.push_str("<![CDATA[");
2525 out.push_str(&node.node_value);
2526 out.push_str("]]>");
2527 }
2528 node_type::PROCESSING_INSTRUCTION => {
2529 out.push_str("<?");
2530 out.push_str(&node.node_name);
2531 if !node.node_value.is_empty() {
2532 out.push(' ');
2533 out.push_str(&node.node_value);
2534 }
2535 out.push_str("?>");
2536 }
2537 _ => {
2538 if !node.node_value.is_empty() {
2539 escape_text(out, &node.node_value);
2540 }
2541 }
2542 }
2543}
2544
2545fn serialize_inline_children(
2549 out: &mut String,
2550 node: &chromiumoxide::cdp::browser_protocol::dom::Node,
2551) {
2552 if let Some(children) = &node.children {
2553 for child in children {
2554 serialize_node_into(out, child);
2555 }
2556 }
2557 if let Some(template_content) = &node.template_content {
2558 serialize_node_into(out, template_content);
2559 }
2560 if let Some(shadow_roots) = &node.shadow_roots {
2561 for shadow in shadow_roots {
2562 serialize_node_into(out, shadow);
2563 }
2564 }
2565 if let Some(content_document) = &node.content_document {
2566 serialize_node_into(out, content_document);
2567 }
2568}
2569
2570fn escape_text(out: &mut String, value: &str) {
2572 for ch in value.chars() {
2573 match ch {
2574 '&' => out.push_str("&"),
2575 '<' => out.push_str("<"),
2576 '>' => out.push_str(">"),
2577 _ => out.push(ch),
2578 }
2579 }
2580}
2581
2582fn escape_attr_name(out: &mut String, value: &str) {
2585 for ch in value.chars() {
2586 match ch {
2587 '&' => out.push_str("&"),
2588 '<' => out.push_str("<"),
2589 '"' => out.push_str("""),
2590 _ => out.push(ch),
2591 }
2592 }
2593}
2594
2595fn escape_attr_value(out: &mut String, value: &str) {
2597 for ch in value.chars() {
2598 match ch {
2599 '&' => out.push_str("&"),
2600 '<' => out.push_str("<"),
2601 '"' => out.push_str("""),
2602 _ => out.push(ch),
2603 }
2604 }
2605}
2606
2607#[cfg(test)]
2610mod tests {
2611 use super::*;
2612
2613 #[test]
2614 fn resource_filter_block_media_blocks_image() {
2615 let filter = ResourceFilter::block_media();
2616 assert!(filter.should_block("Image"));
2617 assert!(filter.should_block("Font"));
2618 assert!(filter.should_block("Stylesheet"));
2619 assert!(filter.should_block("Media"));
2620 assert!(!filter.should_block("Script"));
2621 assert!(!filter.should_block("XHR"));
2622 }
2623
2624 #[test]
2625 fn resource_filter_case_insensitive() {
2626 let filter = ResourceFilter::block_images_and_fonts();
2627 assert!(filter.should_block("image")); assert!(filter.should_block("IMAGE")); assert!(!filter.should_block("Stylesheet"));
2630 }
2631
2632 #[test]
2633 fn resource_filter_builder_chain() {
2634 let filter = ResourceFilter::default()
2635 .block(ResourceType::Image)
2636 .block(ResourceType::Font);
2637 assert!(filter.should_block("Image"));
2638 assert!(filter.should_block("Font"));
2639 assert!(!filter.should_block("Stylesheet"));
2640 }
2641
2642 #[test]
2643 fn resource_filter_dedup_block() {
2644 let filter = ResourceFilter::default()
2645 .block(ResourceType::Image)
2646 .block(ResourceType::Image); assert_eq!(filter.blocked.len(), 1);
2648 }
2649
2650 #[test]
2651 fn resource_filter_is_empty_when_default() {
2652 assert!(ResourceFilter::default().is_empty());
2653 assert!(!ResourceFilter::block_media().is_empty());
2654 }
2655
2656 #[test]
2657 fn wait_until_selector_stores_string() {
2658 let w = WaitUntil::Selector("#foo".to_string());
2659 assert!(matches!(w, WaitUntil::Selector(ref s) if s == "#foo"));
2660 }
2661
2662 #[test]
2663 fn resource_type_cdp_str() {
2664 assert_eq!(ResourceType::Image.as_cdp_str(), "Image");
2665 assert_eq!(ResourceType::Font.as_cdp_str(), "Font");
2666 assert_eq!(ResourceType::Stylesheet.as_cdp_str(), "Stylesheet");
2667 assert_eq!(ResourceType::Media.as_cdp_str(), "Media");
2668 }
2669
2670 #[test]
2671 fn page_handle_is_send_sync() {
2672 fn assert_send<T: Send>() {}
2673 fn assert_sync<T: Sync>() {}
2674 assert_send::<PageHandle>();
2675 assert_sync::<PageHandle>();
2676 }
2677
2678 #[cfg(feature = "extract")]
2681 #[test]
2682 fn extraction_error_missing_is_skippable() {
2683 use crate::extract::ExtractionError;
2684
2685 let missing = ExtractionError::Missing {
2686 field: "title",
2687 selector: "h1",
2688 };
2689 assert!(
2690 matches!(missing, ExtractionError::Missing { .. }),
2691 "ExtractionError::Missing should be the skip variant"
2692 );
2693
2694 let nested = ExtractionError::Nested {
2696 field: "link",
2697 source: Box::new(ExtractionError::Missing {
2698 field: "href",
2699 selector: "a",
2700 }),
2701 };
2702 assert!(
2703 !matches!(nested, ExtractionError::Missing { .. }),
2704 "ExtractionError::Nested must not match Missing"
2705 );
2706 }
2707
2708 #[test]
2710 fn status_code_sentinel_zero_maps_to_none() {
2711 use std::sync::atomic::{AtomicU16, Ordering};
2712 let atom = AtomicU16::new(0);
2713 let code = atom.load(Ordering::Acquire);
2714 assert_eq!(if code == 0 { None } else { Some(code) }, None::<u16>);
2715 }
2716
2717 #[test]
2718 fn status_code_non_zero_maps_to_some() {
2719 use std::sync::atomic::{AtomicU16, Ordering};
2720 for &expected in &[200u16, 301, 404, 503] {
2721 let atom = AtomicU16::new(expected);
2722 let code = atom.load(Ordering::Acquire);
2723 assert_eq!(if code == 0 { None } else { Some(code) }, Some(expected));
2724 }
2725 }
2726
2727 #[test]
2732 fn attr_map_chunking_pairs_correctly() {
2733 let flat = [
2734 "id".to_string(),
2735 "main".to_string(),
2736 "data-ux".to_string(),
2737 "Section".to_string(),
2738 "class".to_string(),
2739 "container".to_string(),
2740 ];
2741 let mut map = std::collections::HashMap::with_capacity(flat.len() / 2);
2742 for pair in flat.chunks_exact(2) {
2743 if let [name, value] = pair {
2744 map.insert(name.clone(), value.clone());
2745 }
2746 }
2747 assert_eq!(map.get("id").map(String::as_str), Some("main"));
2748 assert_eq!(map.get("data-ux").map(String::as_str), Some("Section"));
2749 assert_eq!(map.get("class").map(String::as_str), Some("container"));
2750 assert_eq!(map.len(), 3);
2751 }
2752
2753 #[test]
2755 fn attr_map_chunking_ignores_odd_trailing() {
2756 let flat = ["orphan".to_string()]; let mut map = std::collections::HashMap::new();
2758 for pair in flat.chunks_exact(2) {
2759 if let [name, value] = pair {
2760 map.insert(name.clone(), value.clone());
2761 }
2762 }
2763 assert!(map.is_empty());
2764 }
2765
2766 #[test]
2768 fn attr_map_chunking_empty_input() {
2769 let flat: Vec<String> = vec![];
2770 let map: std::collections::HashMap<String, String> = flat
2771 .chunks_exact(2)
2772 .filter_map(|pair| {
2773 if let [name, value] = pair {
2774 Some((name.clone(), value.clone()))
2775 } else {
2776 None
2777 }
2778 })
2779 .collect();
2780 assert!(map.is_empty());
2781 }
2782
2783 #[test]
2784 fn ancestors_json_parse_round_trip() -> std::result::Result<(), serde_json::Error> {
2785 let json = r#"["p","article","body","html"]"#;
2786 let result: Vec<String> = serde_json::from_str(json)?;
2787 assert_eq!(result, ["p", "article", "body", "html"]);
2788 Ok(())
2789 }
2790
2791 #[test]
2792 fn ancestors_json_parse_empty() -> std::result::Result<(), serde_json::Error> {
2793 let json = "[]";
2794 let result: Vec<String> = serde_json::from_str(json)?;
2795 assert!(result.is_empty());
2796 Ok(())
2797 }
2798
2799 #[test]
2802 fn traversal_selector_suffix_in_stale_error() {
2803 let e = crate::error::BrowserError::StaleNode {
2804 selector: "div::parent".to_string(),
2805 };
2806 let msg = e.to_string();
2807 assert!(
2808 msg.contains("div::parent"),
2809 "StaleNode display must include the full selector; got: {msg}"
2810 );
2811 }
2812
2813 #[test]
2814 fn traversal_next_suffix_in_stale_error() {
2815 let e = crate::error::BrowserError::StaleNode {
2816 selector: "li.price::next".to_string(),
2817 };
2818 assert!(e.to_string().contains("li.price::next"));
2819 }
2820
2821 #[test]
2822 fn traversal_prev_suffix_in_stale_error() {
2823 let e = crate::error::BrowserError::StaleNode {
2824 selector: "td.label::prev".to_string(),
2825 };
2826 assert!(e.to_string().contains("td.label::prev"));
2827 }
2828
2829 #[test]
2832 fn outer_html_strategy_default_is_current() {
2833 assert_eq!(OuterHtmlStrategy::default(), OuterHtmlStrategy::Current);
2834 }
2835
2836 #[test]
2837 fn outer_html_strategy_as_str_matches_variant() {
2838 assert_eq!(OuterHtmlStrategy::Current.as_str(), "Current");
2839 assert_eq!(OuterHtmlStrategy::Recursive.as_str(), "Recursive");
2840 }
2841
2842 #[test]
2843 fn outer_html_strategy_display_matches_as_str() {
2844 assert_eq!(
2845 format!("{}", OuterHtmlStrategy::Current),
2846 OuterHtmlStrategy::Current.as_str()
2847 );
2848 assert_eq!(
2849 format!("{}", OuterHtmlStrategy::Recursive),
2850 OuterHtmlStrategy::Recursive.as_str()
2851 );
2852 }
2853
2854 #[test]
2855 fn outer_html_strategy_is_copy_and_eq() {
2856 let s = OuterHtmlStrategy::Recursive;
2857 let copy = s;
2858 assert_eq!(s, copy);
2859 assert_eq!(s, OuterHtmlStrategy::Recursive);
2860 assert_ne!(s, OuterHtmlStrategy::Current);
2861 }
2862
2863 #[test]
2864 fn outer_html_strategy_all_iterates_both_variants() {
2865 let all = OuterHtmlStrategy::all();
2866 assert_eq!(all.len(), 2);
2867 assert_eq!(all[0], OuterHtmlStrategy::Current);
2868 assert_eq!(all[1], OuterHtmlStrategy::Recursive);
2869 }
2870
2871 #[test]
2872 fn outer_html_strategy_serialize_round_trip()
2873 -> std::result::Result<(), Box<dyn std::error::Error>> {
2874 for variant in OuterHtmlStrategy::all() {
2875 let json = serde_json::to_string(&variant)?;
2876 let restored: OuterHtmlStrategy = serde_json::from_str(&json)?;
2877 assert_eq!(restored, variant);
2878 }
2879 Ok(())
2880 }
2881
2882 #[test]
2883 fn outer_html_result_content_returns_some_for_content() {
2884 let r = OuterHtmlResult::Content("<div/>".to_string());
2885 assert_eq!(r.content(), Some("<div/>"));
2886 }
2887
2888 #[test]
2889 fn outer_html_result_content_returns_none_for_empty() {
2890 assert_eq!(OuterHtmlResult::Empty.content(), None);
2891 }
2892
2893 #[test]
2894 fn outer_html_result_content_returns_none_for_failed() {
2895 let r = OuterHtmlResult::Failed {
2896 backends: vec!["DOM.getOuterHTML"],
2897 };
2898 assert_eq!(r.content(), None);
2899 }
2900
2901 #[test]
2902 fn outer_html_result_is_empty_variants() {
2903 assert!(OuterHtmlResult::Empty.is_empty());
2904 assert!(
2905 OuterHtmlResult::Failed {
2906 backends: vec!["a"]
2907 }
2908 .is_empty()
2909 );
2910 assert!(!OuterHtmlResult::Content("<x/>".to_string()).is_empty());
2911 assert!(OuterHtmlResult::Content(String::new()).is_empty());
2912 }
2913
2914 #[test]
2915 fn outer_html_result_display_includes_state() {
2916 assert_eq!(format!("{}", OuterHtmlResult::Empty), "Empty");
2917 assert_eq!(
2918 format!("{}", OuterHtmlResult::Content("<div/>".to_string())),
2919 "Content(6 bytes)"
2920 );
2921 let failed = OuterHtmlResult::Failed {
2922 backends: vec!["DOM.getOuterHTML", "DOM.describeNode-walk"],
2923 };
2924 let s = format!("{failed}");
2925 assert!(s.contains("DOM.getOuterHTML"));
2926 assert!(s.contains("DOM.describeNode-walk"));
2927 }
2928
2929 #[test]
2930 fn outer_html_result_serializes_each_variant()
2931 -> std::result::Result<(), Box<dyn std::error::Error>> {
2932 let empty_json = serde_json::to_string(&OuterHtmlResult::Empty)?;
2933 assert_eq!(empty_json, "\"Empty\"");
2934
2935 let content_json =
2936 serde_json::to_string(&OuterHtmlResult::Content("<p>x</p>".to_string()))?;
2937 assert_eq!(content_json, r#"{"Content":"<p>x</p>"}"#);
2938
2939 let failed_json = serde_json::to_string(&OuterHtmlResult::Failed {
2940 backends: vec!["DOM.getOuterHTML", "DOM.describeNode-walk"],
2941 })?;
2942 assert_eq!(
2943 failed_json,
2944 r#"{"Failed":{"backends":["DOM.getOuterHTML","DOM.describeNode-walk"]}}"#
2945 );
2946 Ok(())
2947 }
2948
2949 use chromiumoxide::cdp::browser_protocol::dom::{BackendNodeId, Node, NodeId};
2952
2953 fn mk_node(
2954 node_type: i64,
2955 local_name: &str,
2956 node_name: &str,
2957 node_value: &str,
2958 attributes: Option<Vec<String>>,
2959 children: Option<Vec<Node>>,
2960 ) -> Node {
2961 Node {
2962 node_id: NodeId::default(),
2963 parent_id: None,
2964 backend_node_id: BackendNodeId::default(),
2965 node_type,
2966 node_name: node_name.to_string(),
2967 local_name: local_name.to_string(),
2968 node_value: node_value.to_string(),
2969 child_node_count: None,
2970 children,
2971 attributes,
2972 document_url: None,
2973 base_url: None,
2974 public_id: None,
2975 system_id: None,
2976 internal_subset: None,
2977 xml_version: None,
2978 name: None,
2979 value: None,
2980 pseudo_type: None,
2981 pseudo_identifier: None,
2982 shadow_root_type: None,
2983 frame_id: None,
2984 content_document: None,
2985 shadow_roots: None,
2986 template_content: None,
2987 pseudo_elements: None,
2988 distributed_nodes: None,
2989 is_svg: None,
2990 compatibility_mode: None,
2991 assigned_slot: None,
2992 is_scrollable: None,
2993 affected_by_starting_styles: None,
2994 adopted_style_sheets: None,
2995 }
2996 }
2997
2998 #[test]
2999 fn serialize_element_with_text_child() {
3000 let text = mk_node(node_type::TEXT, "", "", "hello", None, None);
3001 let div = mk_node(node_type::ELEMENT, "div", "DIV", "", None, Some(vec![text]));
3002 assert_eq!(serialize_node_tree(&div), "<div>hello</div>");
3003 }
3004
3005 #[test]
3006 fn serialize_element_with_attributes() {
3007 let div = mk_node(
3008 node_type::ELEMENT,
3009 "div",
3010 "DIV",
3011 "",
3012 Some(vec![
3013 "id".into(),
3014 "main".into(),
3015 "class".into(),
3016 "container wide".into(),
3017 ]),
3018 None,
3019 );
3020 assert_eq!(
3021 serialize_node_tree(&div),
3022 r#"<div id="main" class="container wide"></div>"#
3023 );
3024 }
3025
3026 #[test]
3027 fn serialize_void_element_emits_self_closing() {
3028 let img = mk_node(
3029 node_type::ELEMENT,
3030 "img",
3031 "IMG",
3032 "",
3033 Some(vec!["src".into(), "/a.png".into()]),
3034 None,
3035 );
3036 assert_eq!(serialize_node_tree(&img), r#"<img src="/a.png">"#);
3037 let br = mk_node(node_type::ELEMENT, "br", "BR", "", None, None);
3038 assert_eq!(serialize_node_tree(&br), "<br>");
3039 }
3040
3041 #[test]
3042 fn serialize_nested_elements() {
3043 let p = mk_node(
3044 node_type::ELEMENT,
3045 "p",
3046 "P",
3047 "",
3048 None,
3049 Some(vec![mk_node(
3050 node_type::TEXT,
3051 "",
3052 "",
3053 "Mesh content here",
3054 None,
3055 None,
3056 )]),
3057 );
3058 let section = mk_node(
3059 node_type::ELEMENT,
3060 "section",
3061 "SECTION",
3062 "",
3063 None,
3064 Some(vec![p]),
3065 );
3066 let html = serialize_node_tree(§ion);
3067 assert_eq!(html, "<section><p>Mesh content here</p></section>");
3068 }
3069
3070 #[test]
3071 fn serialize_text_escapes_special_chars() {
3072 let n = mk_node(node_type::TEXT, "", "", "a < b && c > d", None, None);
3073 assert_eq!(serialize_node_tree(&n), "a < b && c > d");
3074 }
3075
3076 #[test]
3077 fn serialize_attribute_value_escapes_quotes_and_amp() {
3078 let div = mk_node(
3079 node_type::ELEMENT,
3080 "div",
3081 "DIV",
3082 "",
3083 Some(vec!["title".into(), "a & b \"c\"".into()]),
3084 None,
3085 );
3086 assert_eq!(
3087 serialize_node_tree(&div),
3088 r#"<div title="a & b "c""></div>"#
3089 );
3090 }
3091
3092 #[test]
3093 fn serialize_attribute_name_escapes_special_chars() {
3094 let div = mk_node(
3095 node_type::ELEMENT,
3096 "div",
3097 "DIV",
3098 "",
3099 Some(vec!["weird<\"&".into(), "v".into()]),
3100 None,
3101 );
3102 assert_eq!(
3103 serialize_node_tree(&div),
3104 r#"<div weird<"&="v"></div>"#
3105 );
3106 }
3107
3108 #[test]
3109 fn serialize_comment_node() {
3110 let n = mk_node(node_type::COMMENT, "", "", " a comment ", None, None);
3111 assert_eq!(serialize_node_tree(&n), "<!-- a comment -->");
3112 }
3113
3114 #[test]
3115 fn serialize_document_root_flattens_children() {
3116 let html = mk_node(
3117 node_type::ELEMENT,
3118 "html",
3119 "HTML",
3120 "",
3121 None,
3122 Some(vec![mk_node(
3123 node_type::ELEMENT,
3124 "body",
3125 "BODY",
3126 "",
3127 None,
3128 None,
3129 )]),
3130 );
3131 let doc = mk_node(
3132 node_type::DOCUMENT,
3133 "",
3134 "#document",
3135 "",
3136 None,
3137 Some(vec![html]),
3138 );
3139 assert_eq!(serialize_node_tree(&doc), "<html><body></body></html>");
3140 }
3141
3142 #[test]
3143 fn serialize_document_fragment_root_flattens_children() {
3144 let span = mk_node(
3145 node_type::ELEMENT,
3146 "span",
3147 "SPAN",
3148 "",
3149 None,
3150 Some(vec![mk_node(node_type::TEXT, "", "", "x", None, None)]),
3151 );
3152 let frag = mk_node(
3153 node_type::DOCUMENT_FRAGMENT,
3154 "",
3155 "#document-fragment",
3156 "",
3157 None,
3158 Some(vec![span]),
3159 );
3160 assert_eq!(serialize_node_tree(&frag), "<span>x</span>");
3161 }
3162
3163 #[test]
3164 fn serialize_doctype_node() {
3165 let dt = Node {
3166 public_id: Some("-//W3C//DTD HTML 4.01//EN".to_string()),
3167 system_id: Some("http://www.w3.org/TR/html4/strict.dtd".to_string()),
3168 ..mk_node(node_type::DOCUMENT_TYPE, "", "html", "", None, None)
3169 };
3170 assert_eq!(
3171 serialize_node_tree(&dt),
3172 "<!DOCTYPE html -//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd>"
3173 );
3174 }
3175
3176 #[test]
3177 fn serialize_doctype_node_no_ids() {
3178 let dt = mk_node(node_type::DOCUMENT_TYPE, "", "html", "", None, None);
3179 assert_eq!(serialize_node_tree(&dt), "<!DOCTYPE html>");
3180 }
3181
3182 #[test]
3183 fn serialize_cdata_section() {
3184 let n = mk_node(node_type::CDATA_SECTION, "", "", "raw & <data>", None, None);
3185 assert_eq!(serialize_node_tree(&n), "<![CDATA[raw & <data>]]>");
3186 }
3187
3188 #[test]
3189 fn serialize_processing_instruction() {
3190 let n = mk_node(
3191 node_type::PROCESSING_INSTRUCTION,
3192 "",
3193 "xml-stylesheet",
3194 "href=\"style.css\"",
3195 None,
3196 None,
3197 );
3198 assert_eq!(
3199 serialize_node_tree(&n),
3200 "<?xml-stylesheet href=\"style.css\"?>"
3201 );
3202 }
3203
3204 #[test]
3205 fn serialize_template_inlines_template_content() {
3206 let inner = mk_node(
3207 node_type::ELEMENT,
3208 "span",
3209 "SPAN",
3210 "",
3211 None,
3212 Some(vec![mk_node(node_type::TEXT, "", "", "tmpl", None, None)]),
3213 );
3214 let mut tmpl = mk_node(node_type::ELEMENT, "template", "TEMPLATE", "", None, None);
3215 tmpl.template_content = Some(Box::new(inner));
3216 assert_eq!(
3217 serialize_node_tree(&tmpl),
3218 "<template><span>tmpl</span></template>"
3219 );
3220 }
3221
3222 #[test]
3223 fn serialize_shadow_roots_inlined_into_host() {
3224 let shadow_text = mk_node(node_type::TEXT, "", "", "shadow-text", None, None);
3225 let shadow = Node {
3226 shadow_root_type: Some(chromiumoxide::cdp::browser_protocol::dom::ShadowRootType::Open),
3227 ..mk_node(
3228 node_type::DOCUMENT_FRAGMENT,
3229 "",
3230 "#document-fragment",
3231 "",
3232 None,
3233 Some(vec![mk_node(
3234 node_type::ELEMENT,
3235 "span",
3236 "SPAN",
3237 "",
3238 None,
3239 Some(vec![shadow_text]),
3240 )]),
3241 )
3242 };
3243 let mut host = mk_node(
3244 node_type::ELEMENT,
3245 "div",
3246 "DIV",
3247 "",
3248 None,
3249 Some(vec![mk_node(node_type::TEXT, "", "", "light", None, None)]),
3250 );
3251 host.shadow_roots = Some(vec![shadow]);
3252 assert_eq!(
3253 serialize_node_tree(&host),
3254 "<div>light<span>shadow-text</span></div>"
3255 );
3256 }
3257
3258 #[test]
3259 fn serialize_deeply_nested_subtree() {
3260 let tag_e = mk_node(
3262 node_type::ELEMENT,
3263 "e",
3264 "E",
3265 "",
3266 None,
3267 Some(vec![mk_node(node_type::TEXT, "", "", "deep", None, None)]),
3268 );
3269 let tag_d = mk_node(node_type::ELEMENT, "d", "D", "", None, Some(vec![tag_e]));
3270 let tag_c = mk_node(node_type::ELEMENT, "c", "C", "", None, Some(vec![tag_d]));
3271 let tag_b = mk_node(node_type::ELEMENT, "b", "B", "", None, Some(vec![tag_c]));
3272 let tag_a = mk_node(node_type::ELEMENT, "a", "A", "", None, Some(vec![tag_b]));
3273 assert_eq!(
3274 serialize_node_tree(&tag_a),
3275 "<a><b><c><d><e>deep</e></d></c></b></a>"
3276 );
3277 }
3278
3279 #[test]
3280 fn serialize_element_with_text_and_element_children() {
3281 let span = mk_node(
3282 node_type::ELEMENT,
3283 "span",
3284 "SPAN",
3285 "",
3286 None,
3287 Some(vec![mk_node(node_type::TEXT, "", "", "inline", None, None)]),
3288 );
3289 let div = mk_node(
3290 node_type::ELEMENT,
3291 "div",
3292 "DIV",
3293 "",
3294 None,
3295 Some(vec![
3296 mk_node(node_type::TEXT, "", "", "before", None, None),
3297 span,
3298 mk_node(node_type::TEXT, "", "", "after", None, None),
3299 ]),
3300 );
3301 assert_eq!(
3302 serialize_node_tree(&div),
3303 "<div>before<span>inline</span>after</div>"
3304 );
3305 }
3306
3307 #[test]
3308 fn serialize_attribute_pairs_drop_orphans() {
3309 let div = mk_node(
3311 node_type::ELEMENT,
3312 "div",
3313 "DIV",
3314 "",
3315 Some(vec!["orphan".into()]),
3316 None,
3317 );
3318 assert_eq!(serialize_node_tree(&div), "<div></div>");
3320 }
3321
3322 #[test]
3325 fn warmup_options_defaults() {
3326 let opts = WarmupOptions::default();
3327 assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
3328 assert_eq!(opts.timeout_ms, WarmupOptions::default_timeout_ms());
3329 assert_eq!(opts.stabilize_ms, 0);
3330 }
3331
3332 #[test]
3333 fn warmup_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
3334 {
3335 let opts = WarmupOptions {
3336 url: "https://example.com".to_string(),
3337 wait: WarmupWait::NetworkIdle,
3338 timeout_ms: 15_000,
3339 stabilize_ms: 250,
3340 };
3341 let json = serde_json::to_string(&opts)?;
3342 let restored: WarmupOptions = serde_json::from_str(&json)?;
3343 assert_eq!(restored.url, "https://example.com");
3344 assert_eq!(restored.wait, WarmupWait::NetworkIdle);
3345 assert_eq!(restored.timeout_ms, 15_000);
3346 assert_eq!(restored.stabilize_ms, 250);
3347 Ok(())
3348 }
3349
3350 #[test]
3351 fn warmup_wait_default_is_dom_content_loaded() {
3352 assert_eq!(WarmupWait::default(), WarmupWait::DomContentLoaded);
3353 }
3354
3355 #[test]
3356 fn warmup_wait_into_wait_until_variants() {
3357 assert!(matches!(
3358 WarmupWait::DomContentLoaded.into_wait_until(),
3359 WaitUntil::DomContentLoaded
3360 ));
3361 assert!(matches!(
3362 WarmupWait::NetworkIdle.into_wait_until(),
3363 WaitUntil::NetworkIdle
3364 ));
3365 }
3366
3367 #[test]
3368 fn refresh_options_defaults() {
3369 let opts = RefreshOptions::default();
3370 assert_eq!(opts.wait, WarmupWait::DomContentLoaded);
3371 assert_eq!(opts.timeout_ms, RefreshOptions::default_timeout_ms());
3372 assert!(!opts.reset_connection);
3373 }
3374
3375 #[test]
3376 fn refresh_options_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
3377 {
3378 let opts = RefreshOptions {
3379 wait: WarmupWait::NetworkIdle,
3380 timeout_ms: 10_000,
3381 reset_connection: true,
3382 };
3383 let json = serde_json::to_string(&opts)?;
3384 let restored: RefreshOptions = serde_json::from_str(&json)?;
3385 assert_eq!(restored.wait, WarmupWait::NetworkIdle);
3386 assert_eq!(restored.timeout_ms, 10_000);
3387 assert!(restored.reset_connection);
3388 Ok(())
3389 }
3390
3391 #[test]
3392 fn warmup_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>> {
3393 let report = WarmupReport {
3394 url: "https://example.com".to_string(),
3395 elapsed_ms: 320,
3396 status_code: Some(200),
3397 title: "Example Domain".to_string(),
3398 stabilized: true,
3399 };
3400 let json = serde_json::to_string(&report)?;
3401 let restored: WarmupReport = serde_json::from_str(&json)?;
3402 assert_eq!(restored.url, "https://example.com");
3403 assert_eq!(restored.elapsed_ms, 320);
3404 assert_eq!(restored.status_code, Some(200));
3405 assert_eq!(restored.title, "Example Domain");
3406 assert!(restored.stabilized);
3407 Ok(())
3408 }
3409
3410 #[test]
3411 fn refresh_report_serialize_round_trip() -> std::result::Result<(), Box<dyn std::error::Error>>
3412 {
3413 let report = RefreshReport {
3414 url: "https://example.com/".to_string(),
3415 elapsed_ms: 180,
3416 status_code: Some(304),
3417 };
3418 let json = serde_json::to_string(&report)?;
3419 let restored: RefreshReport = serde_json::from_str(&json)?;
3420 assert_eq!(restored.url, "https://example.com/");
3421 assert_eq!(restored.elapsed_ms, 180);
3422 assert_eq!(restored.status_code, Some(304));
3423 Ok(())
3424 }
3425
3426 #[test]
3427 fn warmup_options_missing_stabilize_ms_defaults_to_zero()
3428 -> std::result::Result<(), Box<dyn std::error::Error>> {
3429 let json = r#"{"url":"https://example.com","timeout_ms":30000}"#;
3432 let opts: WarmupOptions = serde_json::from_str(json)?;
3433 assert_eq!(opts.stabilize_ms, 0);
3434 Ok(())
3435 }
3436
3437 #[test]
3441 #[ignore = "requires live Chrome"]
3442 #[allow(clippy::expect_used)]
3443 fn integration_warmup_then_extraction() {
3444 let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
3445 rt.block_on(async {
3446 use crate::{BrowserConfig, BrowserPool};
3447 let pool = BrowserPool::new(BrowserConfig::default())
3448 .await
3449 .expect("pool");
3450 let handle = pool.acquire().await.expect("handle");
3451 let mut page = handle
3452 .browser()
3453 .expect("browser")
3454 .new_page()
3455 .await
3456 .expect("page");
3457
3458 let report = page
3459 .warmup(WarmupOptions {
3460 url: "https://example.com".to_string(),
3461 wait: WarmupWait::DomContentLoaded,
3462 timeout_ms: 30_000,
3463 stabilize_ms: 0,
3464 })
3465 .await
3466 .expect("warmup");
3467
3468 assert!(!report.title.is_empty(), "title populated after warmup");
3469 assert!(report.elapsed_ms > 0);
3470
3471 let html = page.content().await.expect("content");
3473 assert!(
3474 html.contains("example"),
3475 "page content available after warmup"
3476 );
3477
3478 page.close().await.expect("close");
3479 handle.release().await;
3480 });
3481 }
3482
3483 #[test]
3486 #[ignore = "requires live Chrome"]
3487 #[allow(clippy::expect_used)]
3488 fn integration_refresh_keeps_session_state() {
3489 let rt = tokio::runtime::Runtime::new().expect("tokio runtime");
3490 rt.block_on(async {
3491 use crate::{BrowserConfig, BrowserPool};
3492 let pool = BrowserPool::new(BrowserConfig::default())
3493 .await
3494 .expect("pool");
3495 let handle = pool.acquire().await.expect("handle");
3496 let mut page = handle
3497 .browser()
3498 .expect("browser")
3499 .new_page()
3500 .await
3501 .expect("page");
3502
3503 page.navigate(
3504 "https://example.com",
3505 WaitUntil::DomContentLoaded,
3506 Duration::from_secs(30),
3507 )
3508 .await
3509 .expect("initial navigate");
3510
3511 let report = page
3512 .refresh(RefreshOptions::default())
3513 .await
3514 .expect("refresh");
3515
3516 assert!(
3517 report.url.contains("example.com"),
3518 "URL retained after refresh; got: {}",
3519 report.url
3520 );
3521 assert!(report.elapsed_ms > 0);
3522
3523 page.close().await.expect("close");
3524 handle.release().await;
3525 });
3526 }
3527}