stygian_graph/adapters/
rest_api.rs

1//! REST API scraping adapter with authentication and pagination support.
2//!
3//! Implements [`crate::ports::ScrapingService`] for structured REST JSON APIs. Supports:
4//!
5//! - HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`, `HEAD`
6//! - Authentication: Bearer token, HTTP Basic, API key (header or query param)
7//! - Automatic pagination: offset/page, cursor, or RFC 8288 `Link` header
8//! - JSON response data extraction via dot-separated path
9//! - Custom request headers and query string parameters
10//! - Configurable retries with exponential backoff
11//!
12//! All per-request options live in `ServiceInput::params`; see the
13//! `RestApiAdapter::execute` docs for the full contract.
14//!
15//! # Example
16//!
17//! ```no_run
18//! use stygian_graph::adapters::rest_api::{RestApiAdapter, RestApiConfig};
19//! use stygian_graph::ports::{ScrapingService, ServiceInput};
20//! use serde_json::json;
21//! use std::time::Duration;
22//!
23//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
24//! let adapter = RestApiAdapter::with_config(RestApiConfig {
25//!     timeout:      Duration::from_secs(20),
26//!     max_retries:  2,
27//!     ..Default::default()
28//! });
29//!
30//! let input = ServiceInput {
31//!     url: "https://api.github.com/repos/rust-lang/rust/issues".to_string(),
32//!     params: json!({
33//!         "auth": { "type": "bearer", "token": "ghp_..." },
34//!         "query": { "state": "open", "per_page": "30" },
35//!         "pagination": { "strategy": "link_header", "max_pages": 5 },
36//!         "response": { "data_path": "" }
37//!     }),
38//! };
39//! // let output = adapter.execute(input).await.unwrap();
40//! # });
41//! ```
42
43use std::collections::HashMap;
44use std::time::Duration;
45
46use async_trait::async_trait;
47use reqwest::{Client, Method, Proxy, header};
48use serde_json::{Value, json};
49use tracing::{debug, info, warn};
50
51use crate::domain::error::{Result, ServiceError, StygianError};
52use crate::ports::{ScrapingService, ServiceInput, ServiceOutput};
53
54// ─── Config ───────────────────────────────────────────────────────────────────
55
56/// Configuration for [`RestApiAdapter`].
57///
58/// Adapter-level defaults; per-request settings come from `ServiceInput.params`.
59///
60/// # Example
61///
62/// ```
63/// use stygian_graph::adapters::rest_api::RestApiConfig;
64/// use std::time::Duration;
65///
66/// let cfg = RestApiConfig {
67///     timeout:          Duration::from_secs(20),
68///     max_retries:      2,
69///     retry_base_delay: Duration::from_millis(500),
70///     proxy_url:        None,
71/// };
72/// ```
73#[derive(Debug, Clone)]
74pub struct RestApiConfig {
75    /// Per-request timeout (default: 30 s).
76    pub timeout: Duration,
77    /// Maximum retry attempts per page request on transient errors (default: 3).
78    pub max_retries: u32,
79    /// Base delay for exponential backoff (default: 1 s).
80    pub retry_base_delay: Duration,
81    /// Optional HTTP/HTTPS/SOCKS5 proxy URL.
82    pub proxy_url: Option<String>,
83}
84
85impl Default for RestApiConfig {
86    fn default() -> Self {
87        Self {
88            timeout: Duration::from_secs(30),
89            max_retries: 3,
90            retry_base_delay: Duration::from_secs(1),
91            proxy_url: None,
92        }
93    }
94}
95
96// ─── Internal request model ───────────────────────────────────────────────────
97
98/// Authentication scheme, parsed from `params.auth`.
99#[derive(Debug, Clone)]
100enum AuthScheme {
101    /// No authentication.
102    None,
103    /// `Authorization: Bearer <token>`
104    Bearer(String),
105    /// HTTP Basic authentication.
106    Basic { username: String, password: String },
107    /// Arbitrary header: `<header>: <key>`
108    ApiKeyHeader { header: String, key: String },
109    /// Append `?<param>=<key>` to the query string.
110    ApiKeyQuery { param: String, key: String },
111}
112
113/// Request body variant.
114#[derive(Debug, Clone)]
115enum RequestBody {
116    Json(Value),
117    Raw(String),
118}
119
120/// How to advance to the next page.
121#[derive(Debug, Clone)]
122enum PaginationStrategy {
123    /// Single request — no pagination.
124    None,
125    /// Increment a page/offset query parameter.
126    Offset {
127        page_param: String,
128        page_size_param: Option<String>,
129        page_size: Option<u64>,
130        current_page: u64,
131    },
132    /// Follow a cursor embedded in the response JSON.
133    Cursor {
134        /// Query parameter name that carries the cursor on subsequent requests.
135        cursor_param: String,
136        /// Dot-separated path into the response JSON where the next cursor lives.
137        cursor_field: String,
138    },
139    /// Follow RFC 8288 `Link: <URL>; rel="next"` response header.
140    LinkHeader,
141}
142
143/// Fully-parsed per-request specification, derived from `ServiceInput.params`.
144#[derive(Debug, Clone)]
145struct RequestSpec {
146    method: Method,
147    extra_headers: HashMap<String, String>,
148    query_params: HashMap<String, String>,
149    body: Option<RequestBody>,
150    auth: AuthScheme,
151    accept: String,
152    /// Dot-separated path into the JSON response to extract as data.
153    /// `None` means use the full response body.
154    data_path: Option<String>,
155    /// Return paged data as a flat JSON array even when only one page was fetched.
156    collect_as_array: bool,
157    pagination: PaginationStrategy,
158    max_pages: usize,
159}
160
161// ─── Adapter ──────────────────────────────────────────────────────────────────
162
163/// REST API scraping adapter.
164///
165/// Thread-safe and cheaply cloneable — the inner `reqwest::Client` uses `Arc`
166/// internally. Build once, share across tasks.
167///
168/// # Example
169///
170/// ```
171/// use stygian_graph::adapters::rest_api::RestApiAdapter;
172///
173/// let adapter = RestApiAdapter::new();
174/// ```
175#[derive(Clone)]
176pub struct RestApiAdapter {
177    client: Client,
178    config: RestApiConfig,
179}
180
181impl RestApiAdapter {
182    /// Create a new adapter with default configuration.
183    ///
184    /// # Example
185    ///
186    /// ```
187    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
188    /// let adapter = RestApiAdapter::new();
189    /// ```
190    pub fn new() -> Self {
191        Self::with_config(RestApiConfig::default())
192    }
193
194    /// Create an adapter with custom configuration.
195    ///
196    /// # Panics
197    ///
198    /// Panics only if TLS is unavailable on the host (extremely rare).
199    ///
200    /// # Example
201    ///
202    /// ```
203    /// use stygian_graph::adapters::rest_api::{RestApiAdapter, RestApiConfig};
204    /// use std::time::Duration;
205    ///
206    /// let adapter = RestApiAdapter::with_config(RestApiConfig {
207    ///     timeout: Duration::from_secs(10),
208    ///     ..Default::default()
209    /// });
210    /// ```
211    pub fn with_config(config: RestApiConfig) -> Self {
212        let mut builder = Client::builder()
213            .timeout(config.timeout)
214            .gzip(true)
215            .brotli(true)
216            .use_rustls_tls();
217
218        if let Some(ref proxy_url) = config.proxy_url
219            && let Ok(proxy) = Proxy::all(proxy_url)
220        {
221            builder = builder.proxy(proxy);
222        }
223
224        // SAFETY: TLS via rustls is always available; build() can only fail if the
225        // TLS backend is completely absent, which cannot happen with use_rustls_tls().
226        #[allow(clippy::expect_used)]
227        let client = builder.build().expect("TLS backend unavailable");
228
229        Self { client, config }
230    }
231
232    /// Resolve a dot-separated path into a JSON [`Value`].
233    ///
234    /// Returns `None` if any path segment is missing.
235    ///
236    /// # Example
237    ///
238    /// ```
239    /// use serde_json::json;
240    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
241    ///
242    /// let v = json!({"meta": {"next": "abc123"}});
243    /// assert_eq!(
244    ///     RestApiAdapter::extract_path(&v, "meta.next"),
245    ///     Some(&json!("abc123"))
246    /// );
247    /// assert!(RestApiAdapter::extract_path(&v, "meta.gone").is_none());
248    /// ```
249    pub fn extract_path<'a>(value: &'a Value, path: &str) -> Option<&'a Value> {
250        let mut current = value;
251        for segment in path.split('.') {
252            current = current.get(segment)?;
253        }
254        Some(current)
255    }
256
257    /// Parse an RFC 8288 `Link` header and return the `rel="next"` URL, if any.
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// use stygian_graph::adapters::rest_api::RestApiAdapter;
263    ///
264    /// let link = r#"<https://api.example.com/items?page=2>; rel="next", <https://api.example.com/items?page=1>; rel="prev""#;
265    /// assert_eq!(
266    ///     RestApiAdapter::parse_link_next(link),
267    ///     Some("https://api.example.com/items?page=2".to_owned())
268    /// );
269    /// ```
270    pub fn parse_link_next(link_header: &str) -> Option<String> {
271        for part in link_header.split(',') {
272            let part = part.trim();
273            let mut url: Option<String> = None;
274            let mut is_next = false;
275            for segment in part.split(';') {
276                let segment = segment.trim();
277                if segment.starts_with('<') && segment.ends_with('>') {
278                    url = Some(segment[1..segment.len() - 1].to_owned());
279                } else if segment.trim_start_matches("rel=").trim_matches('"') == "next" {
280                    is_next = true;
281                }
282            }
283            if is_next {
284                return url;
285            }
286        }
287        None
288    }
289
290    /// Parse `ServiceInput.params` into a `RequestSpec`.
291    #[allow(clippy::indexing_slicing)]
292    fn parse_spec(params: &Value) -> Result<RequestSpec> {
293        let method_str = params["method"].as_str().unwrap_or("GET").to_uppercase();
294        let method = match method_str.as_str() {
295            "GET" => Method::GET,
296            "POST" => Method::POST,
297            "PUT" => Method::PUT,
298            "PATCH" => Method::PATCH,
299            "DELETE" => Method::DELETE,
300            "HEAD" => Method::HEAD,
301            other => {
302                return Err(StygianError::from(ServiceError::Unavailable(format!(
303                    "unknown HTTP method: {other}"
304                ))));
305            }
306        };
307
308        let extra_headers = params["headers"]
309            .as_object()
310            .map(|obj| {
311                obj.iter()
312                    .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_owned())))
313                    .collect()
314            })
315            .unwrap_or_default();
316
317        let query_params = params["query"]
318            .as_object()
319            .map(|obj| {
320                obj.iter()
321                    .filter_map(|(k, v)| {
322                        let s = if v.is_string() {
323                            v.as_str().map(ToOwned::to_owned)
324                        } else {
325                            Some(v.to_string())
326                        };
327                        s.map(|val| (k.clone(), val))
328                    })
329                    .collect()
330            })
331            .unwrap_or_default();
332
333        // body_raw takes precedence over body (raw string vs structured JSON).
334        let body = match params["body_raw"].as_str().filter(|s| !s.is_empty()) {
335            Some(raw) => Some(RequestBody::Raw(raw.to_owned())),
336            None if !params["body"].is_null() => Some(RequestBody::Json(params["body"].clone())),
337            None => None,
338        };
339
340        let accept = params["accept"]
341            .as_str()
342            .unwrap_or("application/json")
343            .to_owned();
344
345        let auth = Self::parse_auth(&params["auth"]);
346
347        let data_path = match params["response"]["data_path"].as_str() {
348            Some("") | None => None,
349            Some(p) => Some(p.to_owned()),
350        };
351        let collect_as_array = params["response"]["collect_as_array"]
352            .as_bool()
353            .unwrap_or(false);
354
355        let max_pages = params["pagination"]["max_pages"]
356            .as_u64()
357            .map_or(1, |n| usize::try_from(n).unwrap_or(usize::MAX));
358
359        let pagination = Self::parse_pagination(&params["pagination"]);
360
361        Ok(RequestSpec {
362            method,
363            extra_headers,
364            query_params,
365            body,
366            auth,
367            accept,
368            data_path,
369            collect_as_array,
370            pagination,
371            max_pages,
372        })
373    }
374
375    /// Parse `params.auth` into an [`AuthScheme`].
376    #[allow(clippy::indexing_slicing)]
377    fn parse_auth(auth: &Value) -> AuthScheme {
378        match auth["type"].as_str().unwrap_or("none") {
379            "bearer" | "oauth2" => auth["token"]
380                .as_str()
381                .map_or(AuthScheme::None, |t| AuthScheme::Bearer(t.to_owned())),
382            "basic" => AuthScheme::Basic {
383                username: auth["username"].as_str().unwrap_or("").to_owned(),
384                password: auth["password"].as_str().unwrap_or("").to_owned(),
385            },
386            "api_key_header" => AuthScheme::ApiKeyHeader {
387                header: auth["header"].as_str().unwrap_or("X-Api-Key").to_owned(),
388                key: auth["key"].as_str().unwrap_or("").to_owned(),
389            },
390            "api_key_query" => AuthScheme::ApiKeyQuery {
391                param: auth["param"].as_str().unwrap_or("api_key").to_owned(),
392                key: auth["key"].as_str().unwrap_or("").to_owned(),
393            },
394            _ => AuthScheme::None,
395        }
396    }
397
398    /// Parse `params.pagination` into a [`PaginationStrategy`].
399    #[allow(clippy::indexing_slicing)]
400    fn parse_pagination(pag: &Value) -> PaginationStrategy {
401        match pag["strategy"].as_str().unwrap_or("none") {
402            "offset" => PaginationStrategy::Offset {
403                page_param: pag["page_param"].as_str().unwrap_or("page").to_owned(),
404                page_size_param: pag["page_size_param"].as_str().map(ToOwned::to_owned),
405                page_size: pag["page_size"].as_u64(),
406                current_page: pag["start_page"].as_u64().unwrap_or(1),
407            },
408            "cursor" => PaginationStrategy::Cursor {
409                cursor_param: pag["cursor_param"].as_str().unwrap_or("cursor").to_owned(),
410                cursor_field: pag["cursor_field"]
411                    .as_str()
412                    .unwrap_or("next_cursor")
413                    .to_owned(),
414            },
415            "link_header" => PaginationStrategy::LinkHeader,
416            _ => PaginationStrategy::None,
417        }
418    }
419
420    /// Extract the data portion of a parsed response using `spec.data_path`.
421    fn extract_data(response: &Value, spec: &RequestSpec) -> Value {
422        spec.data_path
423            .as_deref()
424            .and_then(|path| Self::extract_path(response, path))
425            .cloned()
426            .unwrap_or_else(|| response.clone())
427    }
428
429    /// Execute a single HTTP request, retrying on transient failures.
430    async fn send_one(
431        &self,
432        url: &str,
433        spec: &RequestSpec,
434        extra_query: &HashMap<String, String>,
435    ) -> Result<(Value, Option<String>)> {
436        let mut last_err: Option<StygianError> = None;
437
438        for attempt in 0..=self.config.max_retries {
439            if attempt > 0 {
440                // Honour server Retry-After when available; otherwise exponential backoff.
441                let delay = match &last_err {
442                    Some(StygianError::Service(ServiceError::RateLimited { retry_after_ms })) => {
443                        Duration::from_millis(*retry_after_ms)
444                    }
445                    _ => self.config.retry_base_delay * 2u32.saturating_pow(attempt - 1),
446                };
447                tokio::time::sleep(delay).await;
448                debug!(url, attempt, ?delay, "REST API retry");
449            }
450
451            match self.do_send(url, spec, extra_query).await {
452                Ok(r) => return Ok(r),
453                Err(e) if is_retryable(&e) && attempt < self.config.max_retries => {
454                    last_err = Some(e);
455                }
456                Err(e) => return Err(e),
457            }
458        }
459
460        Err(last_err.unwrap_or_else(|| {
461            StygianError::from(ServiceError::Unavailable("max retries exceeded".into()))
462        }))
463    }
464
465    /// Perform exactly one HTTP round-trip (no retry).
466    ///
467    /// Returns the parsed JSON response body and the raw `Link` header value (if present).
468    async fn do_send(
469        &self,
470        url: &str,
471        spec: &RequestSpec,
472        extra_query: &HashMap<String, String>,
473    ) -> Result<(Value, Option<String>)> {
474        let mut req = self.client.request(spec.method.clone(), url);
475
476        // Accept header
477        req = req.header(header::ACCEPT, spec.accept.as_str());
478
479        // Auth — header-based schemes
480        req = match &spec.auth {
481            AuthScheme::Bearer(token) => req.bearer_auth(token),
482            AuthScheme::Basic { username, password } => req.basic_auth(username, Some(password)),
483            AuthScheme::ApiKeyHeader { header: hdr, key } => req.header(hdr.as_str(), key.as_str()),
484            AuthScheme::ApiKeyQuery { .. } | AuthScheme::None => req,
485        };
486
487        // Custom headers
488        for (k, v) in &spec.extra_headers {
489            req = req.header(k.as_str(), v.as_str());
490        }
491
492        // Merge query params: static + per-page extra + API key query (if applicable)
493        let mut merged: HashMap<String, String> = spec.query_params.clone();
494        merged.extend(extra_query.iter().map(|(k, v)| (k.clone(), v.clone())));
495        if let AuthScheme::ApiKeyQuery { param, key } = &spec.auth {
496            merged.insert(param.clone(), key.clone());
497        }
498        if !merged.is_empty() {
499            let pairs: Vec<(&String, &String)> = merged.iter().collect();
500            req = req.query(&pairs);
501        }
502
503        // Body
504        req = match &spec.body {
505            Some(RequestBody::Json(v)) => req.json(v),
506            Some(RequestBody::Raw(s)) => req.body(s.clone()),
507            None => req,
508        };
509
510        let response = req
511            .send()
512            .await
513            .map_err(|e| StygianError::from(ServiceError::Unavailable(e.to_string())))?;
514
515        let status = response.status();
516
517        // Capture Link header before consuming the response
518        let link_header = response
519            .headers()
520            .get("link")
521            .and_then(|v| v.to_str().ok())
522            .map(ToOwned::to_owned);
523
524        // 429 — honour server Retry-After hint via dedicated error variant.
525        if status.as_u16() == 429 {
526            let retry_after_secs = response
527                .headers()
528                .get("retry-after")
529                .and_then(|v| v.to_str().ok())
530                .and_then(|s| s.parse::<u64>().ok())
531                .unwrap_or(5);
532            warn!(url, retry_after_secs, "REST API rate-limited (429)");
533            return Err(StygianError::from(ServiceError::RateLimited {
534                retry_after_ms: retry_after_secs.saturating_mul(1000),
535            }));
536        }
537
538        if !status.is_success() {
539            let snippet: String = response
540                .text()
541                .await
542                .unwrap_or_default()
543                .chars()
544                .take(200)
545                .collect();
546            return Err(StygianError::from(ServiceError::Unavailable(format!(
547                "HTTP {status}: {snippet}"
548            ))));
549        }
550
551        let body = response
552            .text()
553            .await
554            .map_err(|e| StygianError::from(ServiceError::Unavailable(e.to_string())))?;
555
556        // Parse as JSON when possible; wrap plain text as a JSON string otherwise.
557        let parsed: Value = serde_json::from_str(&body).unwrap_or(Value::String(body));
558
559        Ok((parsed, link_header))
560    }
561}
562
563impl Default for RestApiAdapter {
564    fn default() -> Self {
565        Self::new()
566    }
567}
568
569// ─── Helpers ──────────────────────────────────────────────────────────────────
570
571/// Returns `true` for transient errors that are worth retrying.
572fn is_retryable(err: &StygianError) -> bool {
573    match err {
574        StygianError::Service(ServiceError::RateLimited { .. }) => true,
575        StygianError::Service(ServiceError::Unavailable(msg)) => {
576            msg.contains("429")
577                || msg.contains("500")
578                || msg.contains("502")
579                || msg.contains("503")
580                || msg.contains("504")
581                || msg.contains("connection")
582                || msg.contains("timed out")
583        }
584        _ => false,
585    }
586}
587
588// ─── ScrapingService ──────────────────────────────────────────────────────────
589
590#[async_trait]
591impl ScrapingService for RestApiAdapter {
592    /// Execute one or more REST API requests and return the aggregated result.
593    ///
594    /// # `ServiceInput.url`
595    ///
596    /// Base URL of the REST endpoint (including path; query string is optional).
597    ///
598    /// # `ServiceInput.params` contract
599    ///
600    /// ```json
601    /// {
602    ///   "method":   "GET",
603    ///   "body":     { "key": "value" },
604    ///   "body_raw": "raw body string",
605    ///   "headers":  { "X-Custom-Header": "value" },
606    ///   "query":    { "state": "open", "per_page": "30" },
607    ///   "accept":   "application/json",
608    ///
609    ///   "auth": {
610    ///     "type":     "bearer",
611    ///     "token":    "...",
612    ///     "username": "user",
613    ///     "password": "pass",
614    ///     "header":   "X-Api-Key",
615    ///     "param":    "api_key",
616    ///     "key":      "sk-..."
617    ///   },
618    ///
619    ///   "response": {
620    ///     "data_path":        "items",
621    ///     "collect_as_array": true
622    ///   },
623    ///
624    ///   "pagination": {
625    ///     "strategy":        "link_header",
626    ///     "max_pages":       10,
627    ///     "page_param":      "page",
628    ///     "page_size_param": "per_page",
629    ///     "page_size":       100,
630    ///     "start_page":      1,
631    ///     "cursor_param":    "cursor",
632    ///     "cursor_field":    "meta.next_cursor"
633    ///   }
634    /// }
635    /// ```
636    ///
637    /// # Auth `type` values
638    ///
639    /// | `type` | Required fields | Description |
640    /// | --- | --- | --- |
641    /// | `"bearer"` / `"oauth2"` | `token` | `Authorization: Bearer <token>` |
642    /// | `"basic"` | `username`, `password` | HTTP Basic |
643    /// | `"api_key_header"` | `header`, `key` | Custom header |
644    /// | `"api_key_query"` | `param`, `key` | Query string |
645    /// | `"none"` or absent | — | No auth |
646    ///
647    /// # Pagination strategies
648    ///
649    /// | `strategy` | Description |
650    /// | --- | --- |
651    /// | `"none"` | Single request (default) |
652    /// | `"offset"` | Increment `page_param` from `start_page` |
653    /// | `"cursor"` | Extract next cursor at `cursor_field` in each response; pass it as `cursor_param` |
654    /// | `"link_header"` | Follow RFC 8288 `Link: <url>; rel="next"` header |
655    async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
656        let spec = Self::parse_spec(&input.params)?;
657
658        let mut accumulated: Vec<Value> = Vec::new();
659        let mut page_count: usize = 0;
660        let mut current_url = input.url.clone();
661        let mut pagination = spec.pagination.clone();
662        let mut extra_query: HashMap<String, String> = HashMap::new();
663
664        // Cursor state lives outside the loop so it persists across pages.
665        let mut cursor_state: Option<String> = None;
666
667        info!(url = %input.url, "REST API execute start");
668
669        loop {
670            if page_count >= spec.max_pages {
671                debug!(%current_url, page_count, "REST API: max_pages reached");
672                break;
673            }
674
675            // Build per-page query additions
676            extra_query.clear();
677            match &pagination {
678                PaginationStrategy::Offset {
679                    page_param,
680                    page_size_param,
681                    page_size,
682                    current_page,
683                } => {
684                    extra_query.insert(page_param.clone(), current_page.to_string());
685                    if let (Some(size_param), Some(size)) = (page_size_param, page_size) {
686                        extra_query.insert(size_param.clone(), size.to_string());
687                    }
688                }
689                PaginationStrategy::Cursor { cursor_param, .. } => {
690                    if let Some(ref cursor) = cursor_state {
691                        extra_query.insert(cursor_param.clone(), cursor.clone());
692                    }
693                }
694                PaginationStrategy::None | PaginationStrategy::LinkHeader => {}
695            }
696
697            let (response, link_header) = self.send_one(&current_url, &spec, &extra_query).await?;
698
699            let page_data = Self::extract_data(&response, &spec);
700
701            // Accumulate — empty array responses signal end-of-pagination.
702            match &page_data {
703                Value::Array(items) => {
704                    if items.is_empty() {
705                        debug!("REST API: empty page, stopping pagination");
706                        break;
707                    }
708                    accumulated.extend(items.iter().cloned());
709                }
710                other => {
711                    accumulated.push(other.clone());
712                }
713            }
714            page_count += 1;
715
716            // Advance pagination state
717            let stop = match &mut pagination {
718                PaginationStrategy::None => true,
719                PaginationStrategy::Offset { current_page, .. } => {
720                    *current_page += 1;
721                    false
722                }
723                PaginationStrategy::Cursor { cursor_field, .. } => {
724                    Self::extract_path(&response, cursor_field.as_str())
725                        .and_then(Value::as_str)
726                        .filter(|s| !s.is_empty())
727                        .map(ToOwned::to_owned)
728                        .is_none_or(|cursor| {
729                            cursor_state = Some(cursor);
730                            false
731                        })
732                }
733                PaginationStrategy::LinkHeader => link_header
734                    .as_deref()
735                    .and_then(Self::parse_link_next)
736                    .is_none_or(|next_url| {
737                        current_url = next_url;
738                        false
739                    }),
740            };
741            if stop {
742                break;
743            }
744        }
745
746        // Serialise accumulated results
747        let data_value = if spec.collect_as_array || accumulated.len() > 1 {
748            Value::Array(accumulated)
749        } else {
750            accumulated.into_iter().next().unwrap_or(Value::Null)
751        };
752
753        let data_str = match &data_value {
754            Value::String(s) => s.clone(),
755            other => serde_json::to_string_pretty(other).unwrap_or_default(),
756        };
757
758        let metadata = json!({
759            "url":        input.url,
760            "page_count": page_count,
761        });
762
763        info!(%input.url, page_count, "REST API execute done");
764
765        Ok(ServiceOutput {
766            data: data_str,
767            metadata,
768        })
769    }
770
771    fn name(&self) -> &'static str {
772        "rest-api"
773    }
774}
775
776// ─── Tests ────────────────────────────────────────────────────────────────────
777
778#[cfg(test)]
779#[allow(clippy::unwrap_used, clippy::panic, clippy::indexing_slicing)]
780mod tests {
781    use super::*;
782    use serde_json::json;
783
784    // ── parse_auth ─────────────────────────────────────────────────────────────
785
786    #[test]
787    fn parse_auth_bearer() {
788        let auth = json!({"type": "bearer", "token": "tok123"});
789        match RestApiAdapter::parse_auth(&auth) {
790            AuthScheme::Bearer(t) => assert_eq!(t, "tok123"),
791            other => panic!("unexpected: {other:?}"),
792        }
793    }
794
795    #[test]
796    fn parse_auth_oauth2_alias() {
797        let auth = json!({"type": "oauth2", "token": "oauth_tok"});
798        match RestApiAdapter::parse_auth(&auth) {
799            AuthScheme::Bearer(t) => assert_eq!(t, "oauth_tok"),
800            other => panic!("unexpected: {other:?}"),
801        }
802    }
803
804    #[test]
805    fn parse_auth_basic() {
806        let auth = json!({"type": "basic", "username": "alice", "password": "s3cr3t"});
807        match RestApiAdapter::parse_auth(&auth) {
808            AuthScheme::Basic { username, password } => {
809                assert_eq!(username, "alice");
810                assert_eq!(password, "s3cr3t");
811            }
812            other => panic!("unexpected: {other:?}"),
813        }
814    }
815
816    #[test]
817    fn parse_auth_api_key_header() {
818        let auth = json!({"type": "api_key_header", "header": "X-Token", "key": "k123"});
819        match RestApiAdapter::parse_auth(&auth) {
820            AuthScheme::ApiKeyHeader { header, key } => {
821                assert_eq!(header, "X-Token");
822                assert_eq!(key, "k123");
823            }
824            other => panic!("unexpected: {other:?}"),
825        }
826    }
827
828    #[test]
829    fn parse_auth_api_key_query() {
830        let auth = json!({"type": "api_key_query", "param": "api_key", "key": "qk"});
831        match RestApiAdapter::parse_auth(&auth) {
832            AuthScheme::ApiKeyQuery { param, key } => {
833                assert_eq!(param, "api_key");
834                assert_eq!(key, "qk");
835            }
836            other => panic!("unexpected: {other:?}"),
837        }
838    }
839
840    #[test]
841    fn parse_auth_none_default() {
842        let auth = json!(null);
843        assert!(matches!(
844            RestApiAdapter::parse_auth(&auth),
845            AuthScheme::None
846        ));
847    }
848
849    // ── extract_path ───────────────────────────────────────────────────────────
850
851    #[test]
852    fn extract_path_top_level() {
853        let v = json!({"items": [1, 2, 3]});
854        assert_eq!(
855            RestApiAdapter::extract_path(&v, "items"),
856            Some(&json!([1, 2, 3]))
857        );
858    }
859
860    #[test]
861    fn extract_path_nested() {
862        let v = json!({"meta": {"next_cursor": "abc"}});
863        assert_eq!(
864            RestApiAdapter::extract_path(&v, "meta.next_cursor"),
865            Some(&json!("abc"))
866        );
867    }
868
869    #[test]
870    fn extract_path_missing() {
871        let v = json!({"a": {"b": 1}});
872        assert!(RestApiAdapter::extract_path(&v, "a.c").is_none());
873    }
874
875    // ── parse_link_next ────────────────────────────────────────────────────────
876
877    #[test]
878    fn parse_link_next_present() {
879        let h = r#"<https://api.example.com/items?page=2>; rel="next", <https://api.example.com/items?page=1>; rel="prev""#;
880        assert_eq!(
881            RestApiAdapter::parse_link_next(h),
882            Some("https://api.example.com/items?page=2".to_owned())
883        );
884    }
885
886    #[test]
887    fn parse_link_next_absent() {
888        let h = r#"<https://api.example.com/items?page=1>; rel="prev""#;
889        assert!(RestApiAdapter::parse_link_next(h).is_none());
890    }
891
892    #[test]
893    fn parse_link_next_single() {
894        let h = r#"<https://api.example.com/items?page=3>; rel="next""#;
895        assert_eq!(
896            RestApiAdapter::parse_link_next(h),
897            Some("https://api.example.com/items?page=3".to_owned())
898        );
899    }
900
901    // ── parse_spec ─────────────────────────────────────────────────────────────
902
903    #[test]
904    fn parse_spec_defaults() {
905        let spec = RestApiAdapter::parse_spec(&json!({})).unwrap();
906        assert_eq!(spec.method, Method::GET);
907        assert_eq!(spec.accept, "application/json");
908        assert_eq!(spec.max_pages, 1);
909        assert!(spec.data_path.is_none());
910        assert!(!spec.collect_as_array);
911        assert!(matches!(spec.pagination, PaginationStrategy::None));
912    }
913
914    #[test]
915    fn parse_spec_post_with_body_and_headers() {
916        let params = json!({
917            "method":  "POST",
918            "body":    { "key": "value" },
919            "headers": { "X-Foo": "bar" },
920            "query":   { "limit": "10" }
921        });
922        let spec = RestApiAdapter::parse_spec(&params).unwrap();
923        assert_eq!(spec.method, Method::POST);
924        assert_eq!(spec.extra_headers.get("X-Foo"), Some(&"bar".to_string()));
925        assert_eq!(spec.query_params.get("limit"), Some(&"10".to_string()));
926        assert!(matches!(spec.body, Some(RequestBody::Json(_))));
927    }
928
929    #[test]
930    fn parse_spec_unknown_method_returns_error() {
931        let result = RestApiAdapter::parse_spec(&json!({"method": "BREW"}));
932        assert!(result.is_err());
933    }
934
935    #[test]
936    fn parse_spec_cursor_pagination() {
937        let params = json!({
938            "pagination": {
939                "strategy":     "cursor",
940                "cursor_param": "after",
941                "cursor_field": "page_info.end_cursor",
942                "max_pages":    10
943            }
944        });
945        let spec = RestApiAdapter::parse_spec(&params).unwrap();
946        assert_eq!(spec.max_pages, 10);
947        match spec.pagination {
948            PaginationStrategy::Cursor {
949                cursor_param,
950                cursor_field,
951            } => {
952                assert_eq!(cursor_param, "after");
953                assert_eq!(cursor_field, "page_info.end_cursor");
954            }
955            other => panic!("unexpected: {other:?}"),
956        }
957    }
958
959    #[test]
960    fn parse_spec_offset_pagination() {
961        let params = json!({
962            "pagination": {
963                "strategy":        "offset",
964                "page_param":      "page",
965                "page_size_param": "per_page",
966                "page_size":       50,
967                "start_page":      1,
968                "max_pages":       3
969            }
970        });
971        let spec = RestApiAdapter::parse_spec(&params).unwrap();
972        assert_eq!(spec.max_pages, 3);
973        match spec.pagination {
974            PaginationStrategy::Offset {
975                page_size,
976                current_page,
977                page_param,
978                ..
979            } => {
980                assert_eq!(page_size, Some(50));
981                assert_eq!(current_page, 1);
982                assert_eq!(page_param, "page");
983            }
984            other => panic!("unexpected: {other:?}"),
985        }
986    }
987
988    #[test]
989    fn parse_spec_link_header_pagination() {
990        let params = json!({
991            "pagination": { "strategy": "link_header", "max_pages": 5 }
992        });
993        let spec = RestApiAdapter::parse_spec(&params).unwrap();
994        assert_eq!(spec.max_pages, 5);
995        assert!(matches!(spec.pagination, PaginationStrategy::LinkHeader));
996    }
997
998    #[test]
999    fn parse_spec_data_path_and_collect_as_array() {
1000        let params = json!({
1001            "response": { "data_path": "data.items", "collect_as_array": true }
1002        });
1003        let spec = RestApiAdapter::parse_spec(&params).unwrap();
1004        assert_eq!(spec.data_path, Some("data.items".to_owned()));
1005        assert!(spec.collect_as_array);
1006    }
1007
1008    #[test]
1009    fn parse_spec_empty_data_path_is_none() {
1010        let params = json!({ "response": { "data_path": "" } });
1011        let spec = RestApiAdapter::parse_spec(&params).unwrap();
1012        assert!(spec.data_path.is_none());
1013    }
1014
1015    // ── adapter_name ───────────────────────────────────────────────────────────
1016
1017    #[test]
1018    fn adapter_name() {
1019        assert_eq!(RestApiAdapter::new().name(), "rest-api");
1020    }
1021
1022    // ── is_retryable ────────────────────────────────────────────────────────────
1023
1024    #[test]
1025    fn is_retryable_429() {
1026        let e = StygianError::from(ServiceError::Unavailable(
1027            "HTTP 429 rate-limited".to_string(),
1028        ));
1029        assert!(is_retryable(&e));
1030    }
1031
1032    #[test]
1033    fn is_retryable_503() {
1034        let e = StygianError::from(ServiceError::Unavailable(
1035            "HTTP 503 Service Unavailable".to_string(),
1036        ));
1037        assert!(is_retryable(&e));
1038    }
1039
1040    #[test]
1041    fn is_retryable_404_not_retryable() {
1042        let e = StygianError::from(ServiceError::Unavailable("HTTP 404 Not Found".to_string()));
1043        assert!(!is_retryable(&e));
1044    }
1045
1046    // ── integration ────────────────────────────────────────────────────────────
1047
1048    /// Real HTTP integration test — requires `REST_API_TEST_URL` env var.
1049    ///
1050    /// Run with: `REST_API_TEST_URL=https://httpbin.org/get cargo test -- --ignored`
1051    #[tokio::test]
1052    #[ignore = "requires live REST API endpoint; set REST_API_TEST_URL env var"]
1053    async fn integration_get_httpbin() {
1054        let url = std::env::var("REST_API_TEST_URL")
1055            .unwrap_or_else(|_| "https://httpbin.org/get".to_string());
1056
1057        let adapter = RestApiAdapter::new();
1058        let input = ServiceInput {
1059            url,
1060            params: json!({}),
1061        };
1062        let output = adapter.execute(input).await.unwrap();
1063        assert!(!output.data.is_empty());
1064        assert_eq!(output.metadata["page_count"], 1);
1065    }
1066}