Skip to main content

stygian_browser/
extract.rs

1//! Typed DOM extraction via [`Extract`] derive macro.
2//!
3//! # Example
4//!
5//! ```ignore
6//! use stygian_browser::extract::Extract;
7//! use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
8//! use std::time::Duration;
9//!
10//! #[derive(Extract)]
11//! struct Headline {
12//!     #[selector("h2.headline")]
13//!     title: String,
14//!     #[selector("a.link")]
15//!     link: Option<String>,
16//! }
17//!
18//! # async fn run() -> stygian_browser::error::Result<()> {
19//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
20//! let handle = pool.acquire().await?;
21//! let mut page = handle.browser().expect("valid browser").new_page().await?;
22//! page.navigate(
23//!     "https://example.com",
24//!     WaitUntil::DomContentLoaded,
25//!     Duration::from_secs(30),
26//! ).await?;
27//! let items: Vec<Headline> = page.extract_all::<Headline>("article").await?;
28//! # Ok(())
29//! # }
30//! ```
31
32pub use stygian_extract_derive::Extract;
33
34// ─── ExtractionError ─────────────────────────────────────────────────────────
35
36/// An error produced during `#[derive(Extract)]`-driven extraction.
37///
38/// The [`CdpFailed`][Self::CdpFailed] variant boxes its [`crate::error::BrowserError`]
39/// to avoid an infinitely sized recursive type, since
40/// `BrowserError::ExtractionFailed` can contain this enum.
41#[derive(Debug, thiserror::Error)]
42pub enum ExtractionError {
43    ///
44    /// # Example
45    ///
46    /// ```
47    /// use stygian_browser::extract::ExtractionError;
48    /// ```
49    #[error("required field `{field}` had no match for selector `{selector}`")]
50    Missing {
51        /// Name of the Rust struct field that required a match.
52        field: &'static str,
53        selector: &'static str,
54    },
55
56    /// A CDP call inside extraction failed.
57    ///
58    /// infinitely-sized recursive type (since `BrowserError` may itself contain
59    /// an `ExtractionError` via its `ExtractionFailed` variant).
60    #[error("CDP error extracting field `{field}`: {source}")]
61    CdpFailed {
62        /// Name of the struct field whose CDP call failed.
63        field: &'static str,
64        /// Underlying browser / CDP error.
65        #[source]
66        source: Box<crate::error::BrowserError>,
67    },
68
69    ///
70    /// # Example
71    ///
72    /// ```
73    /// use stygian_browser::extract::ExtractionError;
74    /// # let inner = ExtractionError::Missing { field: "link", selector: "a" };
75    /// let e = ExtractionError::Nested {
76    ///     field: "link",
77    ///     source: Box::new(inner),
78    /// };
79    /// ```
80    #[error("nested extraction for field `{field}` failed: {source}")]
81    Nested {
82        /// Name of the outer struct field that triggered nested extraction.
83        field: &'static str,
84        /// Inner extraction failure.
85        #[source]
86        source: Box<Self>,
87    },
88}
89
90// ─── Extractable trait ───────────────────────────────────────────────────────
91
92/// Types that can be extracted from a live DOM [`crate::page::NodeHandle`].
93///
94/// Implement this manually or derive it with `#[derive(Extract)]`.
95///
96/// # Example
97///
98/// ```ignore
99/// use stygian_browser::extract::{Extractable, ExtractionError, Extract};
100/// use stygian_browser::page::NodeHandle;
101///
102/// #[derive(Extract)]
103/// struct Title {
104///     text: String,
105/// }
106///
107/// // `PageHandle::extract_all::<Title>`.
108/// ```
109pub trait Extractable: Sized {
110    /// Extract an instance of `Self` from the given DOM node.
111    ///
112    ///
113    /// # Errors
114    ///
115    /// Returns an error when a required selector matches no element, when a
116    /// CDP call fails, or when nested extraction fails.
117    fn extract_from(
118        node: &crate::page::NodeHandle,
119    ) -> impl std::future::Future<Output = Result<Self, ExtractionError>> + Send;
120}
121
122// ─── Tests ───────────────────────────────────────────────────────────────────
123
124#[cfg(test)]
125mod tests {
126    use super::*;
127
128    #[test]
129    fn extraction_error_missing_display() {
130        let e = ExtractionError::Missing {
131            field: "foo",
132            selector: ".bar",
133        };
134        let msg = e.to_string();
135        assert!(
136            msg.contains("foo"),
137            "display must contain field name 'foo'; got: {msg}"
138        );
139        assert!(
140            msg.contains(".bar"),
141            "display must contain selector '.bar'; got: {msg}"
142        );
143    }
144
145    #[test]
146    fn extraction_error_nested_display() {
147        let inner = ExtractionError::Missing {
148            field: "href",
149            selector: "a",
150        };
151        let e = ExtractionError::Nested {
152            field: "link",
153            source: Box::new(inner),
154        };
155        let msg = e.to_string();
156        assert!(
157            msg.contains("link"),
158            "display must contain outer field name 'link'; got: {msg}",
159        );
160    }
161}