Skip to main content

stygian_extract_derive/
lib.rs

1//! Proc-macro crate for `#[derive(Extract)]`.
2//!
3//! Used by `stygian-browser` behind the `extract` feature flag.
4//! Do not depend on this crate directly — use `stygian_browser::extract::Extract`.
5
6use proc_macro::TokenStream;
7use quote::quote;
8use syn::{
9    Data, DeriveInput, Fields, GenericArgument, PathArguments, Type, TypePath, parse_macro_input,
10};
11
12// ─── SelectorArgs ─────────────────────────────────────────────────────────────
13
14/// Arguments parsed from `#[selector("css")]`, `#[selector("css", attr = "name")]`,
15/// or `#[selector("css", nested)]`.
16struct SelectorArgs {
17    css: String,
18    attr: Option<String>,
19    nested: bool,
20}
21
22impl syn::parse::Parse for SelectorArgs {
23    fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
24        // First positional argument must be the CSS selector string.
25        let css: syn::LitStr = input.parse()?;
26        let mut attr: Option<String> = None;
27        let mut nested = false;
28
29        while input.peek(syn::Token![,]) {
30            let _: syn::Token![,] = input.parse()?;
31            // Allow a trailing comma with nothing after it.
32            if input.is_empty() {
33                break;
34            }
35            let kw: syn::Ident = input.parse()?;
36            if kw == "attr" {
37                let _: syn::Token![=] = input.parse()?;
38                let s: syn::LitStr = input.parse()?;
39                attr = Some(s.value());
40            } else if kw == "nested" {
41                nested = true;
42            } else {
43                return Err(syn::Error::new_spanned(
44                    kw,
45                    "unknown selector option; expected `attr = \"...\"` or `nested`",
46                ));
47            }
48        }
49
50        Ok(Self {
51            css: css.value(),
52            attr,
53            nested,
54        })
55    }
56}
57
58// ─── Helper: detect Option<T> ─────────────────────────────────────────────────
59
60/// If `ty` is `Option<Inner>`, return `Some(&Inner)`.  Otherwise `None`.
61fn unwrap_option(ty: &Type) -> Option<&Type> {
62    let Type::Path(TypePath { qself: None, path }) = ty else {
63        return None;
64    };
65    let seg = path.segments.last()?;
66    if seg.ident != "Option" {
67        return None;
68    }
69    let PathArguments::AngleBracketed(ref args) = seg.arguments else {
70        return None;
71    };
72    if let Some(GenericArgument::Type(inner)) = args.args.first() {
73        Some(inner)
74    } else {
75        None
76    }
77}
78
79// ─── Per-field code generators ────────────────────────────────────────────────
80
81fn generate_text_extraction(
82    field_name: &syn::Ident,
83    field_name_str: &str,
84    css: &str,
85    is_optional: bool,
86) -> proc_macro2::TokenStream {
87    if is_optional {
88        quote! {
89            let #field_name = {
90                let __children = node.children_matching(#css).await
91                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
92                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
93                    })?;
94                match __children.into_iter().next() {
95                    None => None,
96                    Some(ref __node) => Some(__node.text_content().await
97                        .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
98                            field: #field_name_str, source: ::std::boxed::Box::new(__e),
99                        })?),
100                }
101            };
102        }
103    } else {
104        quote! {
105            let #field_name = {
106                let __children = node.children_matching(#css).await
107                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
108                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
109                    })?;
110                let __first = __children.into_iter().next().ok_or(
111                    ::stygian_browser::extract::ExtractionError::Missing {
112                        field: #field_name_str, selector: #css,
113                    })?;
114                __first.text_content().await
115                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
116                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
117                    })?
118            };
119        }
120    }
121}
122
123fn generate_attr_extraction(
124    field_name: &syn::Ident,
125    field_name_str: &str,
126    css: &str,
127    attr: &str,
128    is_optional: bool,
129) -> proc_macro2::TokenStream {
130    if is_optional {
131        quote! {
132            let #field_name = {
133                let __children = node.children_matching(#css).await
134                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
135                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
136                    })?;
137                match __children.into_iter().next() {
138                    None => None,
139                    Some(ref __node) => __node.attr(#attr).await
140                        .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
141                            field: #field_name_str, source: ::std::boxed::Box::new(__e),
142                        })?,
143                }
144            };
145        }
146    } else {
147        quote! {
148            let #field_name = {
149                let __children = node.children_matching(#css).await
150                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
151                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
152                    })?;
153                let __first = __children.into_iter().next().ok_or(
154                    ::stygian_browser::extract::ExtractionError::Missing {
155                        field: #field_name_str, selector: #css,
156                    })?;
157                __first.attr(#attr).await
158                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
159                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
160                    })?
161                    .unwrap_or_default()
162            };
163        }
164    }
165}
166
167fn generate_nested_extraction(
168    field_name: &syn::Ident,
169    field_name_str: &str,
170    css: &str,
171    inner_ty: &Type,
172    is_optional: bool,
173) -> proc_macro2::TokenStream {
174    if is_optional {
175        quote! {
176            let #field_name = {
177                let __children = node.children_matching(#css).await
178                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
179                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
180                    })?;
181                match __children.into_iter().next() {
182                    None => None,
183                    Some(ref __node) => Some(
184                        <#inner_ty as ::stygian_browser::extract::Extractable>::extract_from(__node).await
185                            .map_err(|__e| ::stygian_browser::extract::ExtractionError::Nested {
186                                field: #field_name_str, source: ::std::boxed::Box::new(__e),
187                            })?),
188                }
189            };
190        }
191    } else {
192        quote! {
193            let #field_name = {
194                let __children = node.children_matching(#css).await
195                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::CdpFailed {
196                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
197                    })?;
198                let __first = __children.into_iter().next().ok_or(
199                    ::stygian_browser::extract::ExtractionError::Missing {
200                        field: #field_name_str, selector: #css,
201                    })?;
202                <#inner_ty as ::stygian_browser::extract::Extractable>::extract_from(&__first).await
203                    .map_err(|__e| ::stygian_browser::extract::ExtractionError::Nested {
204                        field: #field_name_str, source: ::std::boxed::Box::new(__e),
205                    })?
206            };
207        }
208    }
209}
210
211fn generate_field(field: &syn::Field) -> syn::Result<(proc_macro2::TokenStream, syn::Ident)> {
212    let field_name = field
213        .ident
214        .as_ref()
215        .ok_or_else(|| syn::Error::new_spanned(field, "expected a named field"))?
216        .clone();
217    let field_name_str = field_name.to_string();
218
219    let selector_attr = field
220        .attrs
221        .iter()
222        .find(|a| a.path().is_ident("selector"))
223        .ok_or_else(|| {
224            syn::Error::new_spanned(
225                field,
226                format!("field `{field_name_str}` is missing a #[selector(\"...\")] attribute"),
227            )
228        })?;
229
230    let args: SelectorArgs = selector_attr.parse_args()?;
231    let css = &args.css;
232    let is_optional = unwrap_option(&field.ty).is_some();
233
234    let ts = if args.nested {
235        let inner_ty = unwrap_option(&field.ty).unwrap_or(&field.ty);
236        generate_nested_extraction(&field_name, &field_name_str, css, inner_ty, is_optional)
237    } else if let Some(ref attr) = args.attr {
238        generate_attr_extraction(&field_name, &field_name_str, css, attr, is_optional)
239    } else {
240        generate_text_extraction(&field_name, &field_name_str, css, is_optional)
241    };
242
243    Ok((ts, field_name))
244}
245
246// ─── #[derive(Extract)] ───────────────────────────────────────────────────────
247
248/// Derive `stygian_browser::extract::Extractable` for a struct.
249///
250/// Each field must carry `#[selector("css")]`, `#[selector("css", attr = "name")]`,
251/// or `#[selector("css", nested)]`.  Wrapping the field type in `Option<T>` makes
252/// a missing element produce `None` instead of
253/// `ExtractionError::Missing`.
254#[proc_macro_derive(Extract, attributes(selector))]
255pub fn derive_extract(input: TokenStream) -> TokenStream {
256    let input = parse_macro_input!(input as DeriveInput);
257    match expand(&input) {
258        Ok(ts) => ts.into(),
259        Err(e) => e.to_compile_error().into(),
260    }
261}
262
263fn expand(input: &DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
264    let name = &input.ident;
265
266    // Reject non-struct inputs with a clean compile_error.
267    let Data::Struct(ref data_struct) = input.data else {
268        return Err(syn::Error::new_spanned(
269            &input.ident,
270            "#[derive(Extract)] can only be applied to structs",
271        ));
272    };
273
274    // Reject tuple / unit structs.
275    let Fields::Named(ref named_fields) = data_struct.fields else {
276        return Err(syn::Error::new_spanned(
277            &input.ident,
278            "#[derive(Extract)] requires a struct with named fields",
279        ));
280    };
281
282    let pairs: Vec<_> = named_fields
283        .named
284        .iter()
285        .map(generate_field)
286        .collect::<syn::Result<_>>()?;
287
288    let (field_assignments, field_idents): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
289    let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
290
291    Ok(quote! {
292        impl #impl_generics ::stygian_browser::extract::Extractable for #name #ty_generics
293        #where_clause
294        {
295            async fn extract_from(
296                node: &::stygian_browser::page::NodeHandle,
297            ) -> ::std::result::Result<Self, ::stygian_browser::extract::ExtractionError> {
298                #(#field_assignments)*
299                Ok(Self { #(#field_idents),* })
300            }
301        }
302    })
303}