stygian_browser/extract.rs
1//! Typed DOM extraction via [`Extract`] derive macro.
2//!
3//! # Example
4//!
5//! ```ignore
6//! use stygian_browser::extract::Extract;
7//! use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
8//! use std::time::Duration;
9//!
10//! #[derive(Extract)]
11//! struct Headline {
12//! #[selector("h2.headline")]
13//! title: String,
14//! #[selector("a.link")]
15//! link: Option<String>,
16//! }
17//!
18//! # async fn run() -> stygian_browser::error::Result<()> {
19//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
20//! let handle = pool.acquire().await?;
21//! let mut page = handle.browser().expect("valid browser").new_page().await?;
22//! page.navigate(
23//! "https://example.com",
24//! WaitUntil::DomContentLoaded,
25//! Duration::from_secs(30),
26//! ).await?;
27//! let items: Vec<Headline> = page.extract_all::<Headline>("article").await?;
28//! # Ok(())
29//! # }
30//! ```
31
32pub use stygian_extract_derive::Extract;
33
34// ─── ExtractionError ─────────────────────────────────────────────────────────
35
36/// An error produced during `#[derive(Extract)]`-driven extraction.
37///
38/// The [`CdpFailed`][Self::CdpFailed] variant boxes its [`crate::error::BrowserError`]
39/// to avoid an infinitely sized recursive type, since
40/// `BrowserError::ExtractionFailed` can contain this enum.
41#[derive(Debug, thiserror::Error)]
42pub enum ExtractionError {
43 ///
44 /// # Example
45 ///
46 /// ```
47 /// use stygian_browser::extract::ExtractionError;
48 /// ```
49 #[error("required field `{field}` had no match for selector `{selector}`")]
50 Missing {
51 /// Name of the Rust struct field that required a match.
52 field: &'static str,
53 selector: &'static str,
54 },
55
56 /// A CDP call inside extraction failed.
57 ///
58 /// infinitely-sized recursive type (since `BrowserError` may itself contain
59 /// an `ExtractionError` via its `ExtractionFailed` variant).
60 #[error("CDP error extracting field `{field}`: {source}")]
61 CdpFailed {
62 /// Name of the struct field whose CDP call failed.
63 field: &'static str,
64 /// Underlying browser / CDP error.
65 #[source]
66 source: Box<crate::error::BrowserError>,
67 },
68
69 ///
70 /// # Example
71 ///
72 /// ```
73 /// use stygian_browser::extract::ExtractionError;
74 /// # let inner = ExtractionError::Missing { field: "link", selector: "a" };
75 /// let e = ExtractionError::Nested {
76 /// field: "link",
77 /// source: Box::new(inner),
78 /// };
79 /// ```
80 #[error("nested extraction for field `{field}` failed: {source}")]
81 Nested {
82 /// Name of the outer struct field that triggered nested extraction.
83 field: &'static str,
84 /// Inner extraction failure.
85 #[source]
86 source: Box<Self>,
87 },
88}
89
90// ─── Extractable trait ───────────────────────────────────────────────────────
91
92/// Types that can be extracted from a live DOM [`crate::page::NodeHandle`].
93///
94/// Implement this manually or derive it with `#[derive(Extract)]`.
95///
96/// # Example
97///
98/// ```ignore
99/// use stygian_browser::extract::{Extractable, ExtractionError, Extract};
100/// use stygian_browser::page::NodeHandle;
101///
102/// #[derive(Extract)]
103/// struct Title {
104/// text: String,
105/// }
106///
107/// // `PageHandle::extract_all::<Title>`.
108/// ```
109pub trait Extractable: Sized {
110 /// Extract an instance of `Self` from the given DOM node.
111 ///
112 ///
113 /// # Errors
114 ///
115 /// Returns an error when a required selector matches no element, when a
116 /// CDP call fails, or when nested extraction fails.
117 fn extract_from(
118 node: &crate::page::NodeHandle,
119 ) -> impl std::future::Future<Output = Result<Self, ExtractionError>> + Send;
120}
121
122// ─── Tests ───────────────────────────────────────────────────────────────────
123
124#[cfg(test)]
125mod tests {
126 use super::*;
127
128 #[test]
129 fn extraction_error_missing_display() {
130 let e = ExtractionError::Missing {
131 field: "foo",
132 selector: ".bar",
133 };
134 let msg = e.to_string();
135 assert!(
136 msg.contains("foo"),
137 "display must contain field name 'foo'; got: {msg}"
138 );
139 assert!(
140 msg.contains(".bar"),
141 "display must contain selector '.bar'; got: {msg}"
142 );
143 }
144
145 #[test]
146 fn extraction_error_nested_display() {
147 let inner = ExtractionError::Missing {
148 field: "href",
149 selector: "a",
150 };
151 let e = ExtractionError::Nested {
152 field: "link",
153 source: Box::new(inner),
154 };
155 let msg = e.to_string();
156 assert!(
157 msg.contains("link"),
158 "display must contain outer field name 'link'; got: {msg}",
159 );
160 }
161}