stygian_browser/
session.rs

1//! Session persistence for long-running scraping campaigns.
2//!
3//! Save and restore browser state (cookies and localStorage) across runs so
4//! you can login once and reuse the authenticated session without repeating
5//! the authentication flow.
6//!
7//! ## Use case
8//!
9//! ```no_run
10//! use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
11//! use stygian_browser::session::{save_session, restore_session, SessionSnapshot};
12//! use std::time::Duration;
13//!
14//! # async fn run() -> stygian_browser::error::Result<()> {
15//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
16//! let handle = pool.acquire().await?;
17//! let mut page = handle.browser().expect("valid browser").new_page().await?;
18//!
19//! // First run: login and save
20//! page.navigate("https://example.com/login", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
21//! // …perform login…
22//! let snapshot = save_session(&page).await?;
23//! snapshot.save_to_file("session.json")?;
24//!
25//! // Later run: restore
26//! let snapshot = SessionSnapshot::load_from_file("session.json")?;
27//! restore_session(&page, &snapshot).await?;
28//! // Now the page has the saved cookies + localStorage
29//! # Ok(())
30//! # }
31//! ```
32
33use std::{
34    collections::HashMap,
35    path::Path,
36    time::{Duration, SystemTime, UNIX_EPOCH},
37};
38
39use serde::{Deserialize, Serialize};
40use tracing::{debug, warn};
41
42use crate::{
43    error::{BrowserError, Result},
44    page::PageHandle,
45};
46
47// ─── Cookie ──────────────────────────────────────────────────────────────────
48
49/// A serialisable browser cookie.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SessionCookie {
52    /// Cookie name.
53    pub name: String,
54    /// Cookie value.
55    pub value: String,
56    /// Domain (e.g. `.example.com`).
57    pub domain: String,
58    /// URL path (e.g. `/`).
59    pub path: String,
60    /// Expiry as Unix timestamp seconds (`-1` = session cookie).
61    pub expires: f64,
62    /// HTTP-only flag.
63    pub http_only: bool,
64    /// Secure flag.
65    pub secure: bool,
66    /// `SameSite` attribute (`"Strict"`, `"Lax"`, `"None"`, or empty).
67    pub same_site: String,
68}
69
70// ─── Snapshot ────────────────────────────────────────────────────────────────
71
72/// A point-in-time snapshot of a browser session.
73///
74/// Contains cookies and localStorage entries that are sufficient to resume
75/// most authenticated sessions.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct SessionSnapshot {
78    /// Origin URL the session was captured from (e.g. `"https://example.com"`).
79    pub origin: String,
80    /// Saved cookies.
81    pub cookies: Vec<SessionCookie>,
82    /// localStorage key-value pairs captured from the page.
83    pub local_storage: HashMap<String, String>,
84    /// Unix timestamp (seconds) when this snapshot was captured.
85    pub captured_at: u64,
86    /// Approximate TTL for auto-expiry checks. `None` means never expire.
87    pub ttl_secs: Option<u64>,
88}
89
90impl SessionSnapshot {
91    /// Returns `true` if the snapshot has exceeded its TTL.
92    ///
93    /// Always returns `false` when no TTL is set.
94    pub fn is_expired(&self) -> bool {
95        let Some(ttl) = self.ttl_secs else {
96            return false;
97        };
98        let now = SystemTime::now()
99            .duration_since(UNIX_EPOCH)
100            .unwrap_or(Duration::ZERO)
101            .as_secs();
102        now.saturating_sub(self.captured_at) > ttl
103    }
104
105    /// Age of the snapshot.
106    pub fn age(&self) -> Duration {
107        let now = SystemTime::now()
108            .duration_since(UNIX_EPOCH)
109            .unwrap_or(Duration::ZERO)
110            .as_secs();
111        Duration::from_secs(now.saturating_sub(self.captured_at))
112    }
113
114    /// Serialise to a JSON file.
115    ///
116    /// # Errors
117    ///
118    /// Returns an IO or serialisation error if the file cannot be written.
119    pub fn save_to_file(&self, path: impl AsRef<Path>) -> Result<()> {
120        let json = serde_json::to_string_pretty(self)
121            .map_err(|e| BrowserError::ConfigError(format!("Failed to serialise session: {e}")))?;
122        std::fs::write(path, json).map_err(BrowserError::Io)
123    }
124
125    /// Deserialise from a JSON file previously written by [`Self::save_to_file`].
126    ///
127    /// # Errors
128    ///
129    /// Returns an IO or deserialisation error if the file cannot be read.
130    pub fn load_from_file(path: impl AsRef<Path>) -> Result<Self> {
131        let json = std::fs::read_to_string(path).map_err(BrowserError::Io)?;
132        serde_json::from_str(&json)
133            .map_err(|e| BrowserError::ConfigError(format!("Failed to deserialise session: {e}")))
134    }
135}
136
137// ─── Save ─────────────────────────────────────────────────────────────────────
138
139/// Capture the current session state from `page`.
140///
141/// Saves all cookies visible to the page's origin and the full `localStorage`
142/// contents.
143///
144/// # Errors
145///
146/// Returns a CDP error if the cookie fetch or localStorage eval fails.
147pub async fn save_session(page: &PageHandle) -> Result<SessionSnapshot> {
148    let cdp_cookies = page.save_cookies().await?;
149
150    let cookies: Vec<SessionCookie> = cdp_cookies
151        .iter()
152        .map(|c| SessionCookie {
153            name: c.name.clone(),
154            value: c.value.clone(),
155            domain: c.domain.clone(),
156            path: c.path.clone(),
157            expires: c.expires,
158            http_only: c.http_only,
159            secure: c.secure,
160            same_site: c
161                .same_site
162                .as_ref()
163                .map(|s| format!("{s:?}"))
164                .unwrap_or_default(),
165        })
166        .collect();
167
168    // Capture localStorage via JS
169    let local_storage: HashMap<String, String> = capture_local_storage(page).await?;
170
171    // Best-effort origin from current URL
172    let origin = page
173        .eval::<String>("window.location.origin")
174        .await
175        .unwrap_or_default();
176
177    let captured_at = SystemTime::now()
178        .duration_since(UNIX_EPOCH)
179        .unwrap_or(Duration::ZERO)
180        .as_secs();
181
182    debug!(
183        origin = %origin,
184        cookie_count = cookies.len(),
185        ls_keys = local_storage.len(),
186        "Session snapshot captured"
187    );
188
189    Ok(SessionSnapshot {
190        origin,
191        cookies,
192        local_storage,
193        captured_at,
194        ttl_secs: None,
195    })
196}
197
198// ─── Restore ──────────────────────────────────────────────────────────────────
199
200/// Restore a previously saved session into `page`.
201///
202/// Imports all cookies via `Network.setCookie` and injects the localStorage
203/// entries via JavaScript.
204///
205/// # Errors
206///
207/// Returns a CDP error if cookie injection or the localStorage script fails.
208pub async fn restore_session(page: &PageHandle, snapshot: &SessionSnapshot) -> Result<()> {
209    use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
210
211    if snapshot.is_expired() {
212        warn!(
213            age_secs = snapshot.age().as_secs(),
214            "Restoring an expired session snapshot"
215        );
216    }
217
218    // Inject cookies
219    for cookie in &snapshot.cookies {
220        let params = match SetCookieParams::builder()
221            .name(cookie.name.clone())
222            .value(cookie.value.clone())
223            .domain(cookie.domain.clone())
224            .path(cookie.path.clone())
225            .http_only(cookie.http_only)
226            .secure(cookie.secure)
227            .build()
228        {
229            Ok(p) => p,
230            Err(e) => {
231                warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
232                continue;
233            }
234        };
235
236        if let Err(e) = page.inner().execute(params).await {
237            warn!(
238                cookie = %cookie.name,
239                error = %e,
240                "Failed to restore cookie"
241            );
242        }
243    }
244
245    // Inject localStorage via JS
246    if !snapshot.local_storage.is_empty() {
247        let entries: Vec<String> = snapshot
248            .local_storage
249            .iter()
250            .map(|(k, v)| {
251                let k_esc = k.replace('\'', "\\'");
252                let v_esc = v.replace('\'', "\\'");
253                format!("localStorage.setItem('{k_esc}', '{v_esc}');")
254            })
255            .collect();
256
257        let script = entries.join("\n");
258
259        let _: serde_json::Value = page.eval(&script).await.unwrap_or(serde_json::Value::Null);
260    }
261
262    debug!(
263        origin = %snapshot.origin,
264        cookie_count = snapshot.cookies.len(),
265        ls_keys = snapshot.local_storage.len(),
266        "Session restored"
267    );
268
269    Ok(())
270}
271
272// ─── Helpers ──────────────────────────────────────────────────────────────────
273
274/// Evaluate `localStorage` and return all key-value pairs.
275async fn capture_local_storage(page: &PageHandle) -> Result<HashMap<String, String>> {
276    // JS: iterate localStorage and return {key: value, ...}
277    let script = r"
278        (function() {
279            var out = {};
280            for (var i = 0; i < localStorage.length; i++) {
281                var k = localStorage.key(i);
282                out[k] = localStorage.getItem(k);
283            }
284            return JSON.stringify(out);
285        })()
286    ";
287
288    match page.eval::<String>(script).await {
289        Ok(json_str) => serde_json::from_str(&json_str).map_err(|e| {
290            BrowserError::ConfigError(format!("Failed to parse localStorage JSON: {e}"))
291        }),
292        Err(e) => {
293            warn!("localStorage capture failed (non-HTML page?): {e}");
294            Ok(HashMap::new())
295        }
296    }
297}
298
299// ─── Tests ───────────────────────────────────────────────────────────────────
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304    use std::time::{Duration, SystemTime, UNIX_EPOCH};
305
306    fn make_snapshot(captured_at: u64, ttl_secs: Option<u64>) -> SessionSnapshot {
307        SessionSnapshot {
308            origin: "https://example.com".to_string(),
309            cookies: vec![],
310            local_storage: HashMap::new(),
311            captured_at,
312            ttl_secs,
313        }
314    }
315
316    #[test]
317    fn snapshot_not_expired_without_ttl() {
318        let s = make_snapshot(0, None);
319        assert!(!s.is_expired());
320    }
321
322    #[test]
323    fn snapshot_expired_when_past_ttl() {
324        // captured 1000s ago, ttl = 100s → expired
325        let now = SystemTime::now()
326            .duration_since(UNIX_EPOCH)
327            .unwrap_or_default()
328            .as_secs();
329        let s = make_snapshot(now - 1000, Some(100));
330        assert!(s.is_expired());
331    }
332
333    #[test]
334    fn snapshot_not_expired_within_ttl() {
335        let now = SystemTime::now()
336            .duration_since(UNIX_EPOCH)
337            .unwrap_or_default()
338            .as_secs();
339        let s = make_snapshot(now - 10, Some(3600));
340        assert!(!s.is_expired());
341    }
342
343    #[test]
344    fn snapshot_age_is_reasonable() {
345        let now = SystemTime::now()
346            .duration_since(UNIX_EPOCH)
347            .unwrap_or_default()
348            .as_secs();
349        let s = make_snapshot(now - 60, None);
350        let age = s.age();
351        assert!(
352            age >= Duration::from_secs(59),
353            "age should be ≥59s, got {age:?}"
354        );
355        assert!(
356            age < Duration::from_secs(65),
357            "age should be <65s, got {age:?}"
358        );
359    }
360
361    #[test]
362    fn snapshot_roundtrips_json() -> std::result::Result<(), Box<dyn std::error::Error>> {
363        let mut s = make_snapshot(1_700_000_000, Some(7200));
364        s.cookies.push(SessionCookie {
365            name: "session_id".to_string(),
366            value: "abc123".to_string(),
367            domain: "example.com".to_string(),
368            path: "/".to_string(),
369            expires: -1.0,
370            http_only: true,
371            secure: true,
372            same_site: "Lax".to_string(),
373        });
374        s.local_storage
375            .insert("theme".to_string(), "dark".to_string());
376
377        let json = serde_json::to_string(&s)?;
378        let decoded: SessionSnapshot = serde_json::from_str(&json)?;
379
380        assert_eq!(decoded.cookies.len(), 1);
381        if let Some(c) = decoded.cookies.first() {
382            assert_eq!(c.name, "session_id");
383        }
384        assert_eq!(
385            decoded.local_storage.get("theme").map(String::as_str),
386            Some("dark")
387        );
388        assert_eq!(decoded.ttl_secs, Some(7200));
389        Ok(())
390    }
391
392    #[test]
393    fn snapshot_file_roundtrip() -> std::result::Result<(), Box<dyn std::error::Error>> {
394        let s = make_snapshot(0, Some(3600));
395        let dir = std::env::temp_dir();
396        let path = dir.join("stygian_session_test.json");
397        s.save_to_file(&path)?;
398        let loaded = SessionSnapshot::load_from_file(&path)?;
399        assert_eq!(loaded.origin, s.origin);
400        let _ = std::fs::remove_file(&path);
401        Ok(())
402    }
403}