Skip to main content

stygian_browser/
session.rs

1//! Session persistence for long-running scraping campaigns.
2//!
3//! Save and restore browser state (cookies and localStorage) across runs so
4//! you can login once and reuse the authenticated session without repeating
5//! the authentication flow.
6//!
7//! ## Use case
8//!
9//! ```no_run
10//! use stygian_browser::{BrowserPool, BrowserConfig, WaitUntil};
11//! use stygian_browser::session::{save_session, restore_session, SessionSnapshot};
12//! use std::time::Duration;
13//!
14//! # async fn run() -> stygian_browser::error::Result<()> {
15//! let pool = BrowserPool::new(BrowserConfig::default()).await?;
16//! let handle = pool.acquire().await?;
17//! let mut page = handle.browser().expect("valid browser").new_page().await?;
18//!
19//! // First run: login and save
20//! page.navigate("https://example.com/login", WaitUntil::Selector("body".to_string()), Duration::from_secs(30)).await?;
21//! // …perform login…
22//! let snapshot = save_session(&page).await?;
23//! snapshot.save_to_file("session.json")?;
24//!
25//! // Later run: restore
26//! let snapshot = SessionSnapshot::load_from_file("session.json")?;
27//! restore_session(&page, &snapshot).await?;
28//! // Now the page has the saved cookies + localStorage
29//! # Ok(())
30//! # }
31//! ```
32
33use std::{
34    collections::HashMap,
35    path::Path,
36    time::{Duration, SystemTime, UNIX_EPOCH},
37};
38
39use serde::{Deserialize, Serialize};
40use tracing::{debug, warn};
41
42use crate::{
43    error::{BrowserError, Result},
44    page::PageHandle,
45};
46
47// ─── Cookie ──────────────────────────────────────────────────────────────────
48
49/// A serialisable browser cookie.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SessionCookie {
52    /// Cookie name.
53    pub name: String,
54    /// Cookie value.
55    pub value: String,
56    /// Domain (e.g. `.example.com`).
57    pub domain: String,
58    /// URL path (e.g. `/`).
59    pub path: String,
60    /// Expiry as Unix timestamp seconds (`-1` = session cookie).
61    pub expires: f64,
62    /// HTTP-only flag.
63    pub http_only: bool,
64    /// Secure flag.
65    pub secure: bool,
66    /// `SameSite` attribute (`"Strict"`, `"Lax"`, `"None"`, or empty).
67    pub same_site: String,
68}
69
70// ─── Snapshot ────────────────────────────────────────────────────────────────
71
72/// A point-in-time snapshot of a browser session.
73///
74/// Contains cookies and localStorage entries that are sufficient to resume
75/// most authenticated sessions.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct SessionSnapshot {
78    /// Origin URL the session was captured from (e.g. `"https://example.com"`).
79    pub origin: String,
80    /// Saved cookies.
81    pub cookies: Vec<SessionCookie>,
82    /// localStorage key-value pairs captured from the page.
83    pub local_storage: HashMap<String, String>,
84    /// Unix timestamp (seconds) when this snapshot was captured.
85    pub captured_at: u64,
86    /// Approximate TTL for auto-expiry checks. `None` means never expire.
87    pub ttl_secs: Option<u64>,
88}
89
90impl SessionSnapshot {
91    /// Returns `true` if the snapshot has exceeded its TTL.
92    ///
93    /// Always returns `false` when no TTL is set.
94    #[must_use]
95    pub fn is_expired(&self) -> bool {
96        let Some(ttl) = self.ttl_secs else {
97            return false;
98        };
99        let now = SystemTime::now()
100            .duration_since(UNIX_EPOCH)
101            .unwrap_or(Duration::ZERO)
102            .as_secs();
103        now.saturating_sub(self.captured_at) > ttl
104    }
105
106    /// Age of the snapshot.
107    #[must_use]
108    pub fn age(&self) -> Duration {
109        let now = SystemTime::now()
110            .duration_since(UNIX_EPOCH)
111            .unwrap_or(Duration::ZERO)
112            .as_secs();
113        Duration::from_secs(now.saturating_sub(self.captured_at))
114    }
115
116    /// Serialise to a JSON file.
117    ///
118    /// # Errors
119    ///
120    /// Returns an IO or serialisation error if the file cannot be written.
121    pub fn save_to_file(&self, path: impl AsRef<Path>) -> Result<()> {
122        let json = serde_json::to_string_pretty(self)
123            .map_err(|e| BrowserError::ConfigError(format!("Failed to serialise session: {e}")))?;
124        std::fs::write(path, json).map_err(BrowserError::Io)
125    }
126
127    /// Deserialise from a JSON file previously written by [`Self::save_to_file`].
128    ///
129    /// # Errors
130    ///
131    /// Returns an IO or deserialisation error if the file cannot be read.
132    pub fn load_from_file(path: impl AsRef<Path>) -> Result<Self> {
133        let json = std::fs::read_to_string(path).map_err(BrowserError::Io)?;
134        serde_json::from_str(&json)
135            .map_err(|e| BrowserError::ConfigError(format!("Failed to deserialise session: {e}")))
136    }
137}
138
139// ─── Save ─────────────────────────────────────────────────────────────────────
140
141/// Capture the current session state from `page`.
142///
143/// Saves all cookies visible to the page's origin and the full `localStorage`
144/// contents.
145///
146/// # Errors
147///
148/// Returns a CDP error if the cookie fetch or localStorage eval fails.
149pub async fn save_session(page: &PageHandle) -> Result<SessionSnapshot> {
150    let cdp_cookies = page.save_cookies().await?;
151
152    let cookies: Vec<SessionCookie> = cdp_cookies
153        .iter()
154        .map(|c| SessionCookie {
155            name: c.name.clone(),
156            value: c.value.clone(),
157            domain: c.domain.clone(),
158            path: c.path.clone(),
159            expires: c.expires,
160            http_only: c.http_only,
161            secure: c.secure,
162            same_site: c
163                .same_site
164                .as_ref()
165                .map(|s| format!("{s:?}"))
166                .unwrap_or_default(),
167        })
168        .collect();
169
170    // Capture localStorage via JS
171    let local_storage: HashMap<String, String> = capture_local_storage(page).await?;
172
173    // Best-effort origin from current URL
174    let origin = page
175        .eval::<String>("window.location.origin")
176        .await
177        .unwrap_or_default();
178
179    let captured_at = SystemTime::now()
180        .duration_since(UNIX_EPOCH)
181        .unwrap_or(Duration::ZERO)
182        .as_secs();
183
184    debug!(
185        origin = %origin,
186        cookie_count = cookies.len(),
187        ls_keys = local_storage.len(),
188        "Session snapshot captured"
189    );
190
191    Ok(SessionSnapshot {
192        origin,
193        cookies,
194        local_storage,
195        captured_at,
196        ttl_secs: None,
197    })
198}
199
200// ─── Restore ──────────────────────────────────────────────────────────────────
201
202/// Restore a previously saved session into `page`.
203///
204/// Imports all cookies via `Network.setCookie` and injects the localStorage
205/// entries via JavaScript.
206///
207/// # Errors
208///
209/// Returns a CDP error if cookie injection or the localStorage script fails.
210pub async fn restore_session(page: &PageHandle, snapshot: &SessionSnapshot) -> Result<()> {
211    use chromiumoxide::cdp::browser_protocol::network::SetCookieParams;
212
213    if snapshot.is_expired() {
214        warn!(
215            age_secs = snapshot.age().as_secs(),
216            "Restoring an expired session snapshot"
217        );
218    }
219
220    // Inject cookies
221    for cookie in &snapshot.cookies {
222        let params = match SetCookieParams::builder()
223            .name(cookie.name.clone())
224            .value(cookie.value.clone())
225            .domain(cookie.domain.clone())
226            .path(cookie.path.clone())
227            .http_only(cookie.http_only)
228            .secure(cookie.secure)
229            .build()
230        {
231            Ok(p) => p,
232            Err(e) => {
233                warn!(cookie = %cookie.name, error = %e, "Failed to build cookie params");
234                continue;
235            }
236        };
237
238        if let Err(e) = page.inner().execute(params).await {
239            warn!(
240                cookie = %cookie.name,
241                error = %e,
242                "Failed to restore cookie"
243            );
244        }
245    }
246
247    // Inject localStorage via JS
248    if !snapshot.local_storage.is_empty() {
249        let entries: Vec<String> = snapshot
250            .local_storage
251            .iter()
252            .map(|(k, v)| {
253                let k_esc = k.replace('\'', "\\'");
254                let v_esc = v.replace('\'', "\\'");
255                format!("localStorage.setItem('{k_esc}', '{v_esc}');")
256            })
257            .collect();
258
259        let script = entries.join("\n");
260
261        let _: serde_json::Value = page.eval(&script).await.unwrap_or(serde_json::Value::Null);
262    }
263
264    debug!(
265        origin = %snapshot.origin,
266        cookie_count = snapshot.cookies.len(),
267        ls_keys = snapshot.local_storage.len(),
268        "Session restored"
269    );
270
271    Ok(())
272}
273
274// ─── Helpers ──────────────────────────────────────────────────────────────────
275
276/// Evaluate `localStorage` and return all key-value pairs.
277async fn capture_local_storage(page: &PageHandle) -> Result<HashMap<String, String>> {
278    // JS: iterate localStorage and return {key: value, ...}
279    let script = r"
280        (function() {
281            var out = {};
282            for (var i = 0; i < localStorage.length; i++) {
283                var k = localStorage.key(i);
284                out[k] = localStorage.getItem(k);
285            }
286            return JSON.stringify(out);
287        })()
288    ";
289
290    match page.eval::<String>(script).await {
291        Ok(json_str) => serde_json::from_str(&json_str).map_err(|e| {
292            BrowserError::ConfigError(format!("Failed to parse localStorage JSON: {e}"))
293        }),
294        Err(e) => {
295            warn!("localStorage capture failed (non-HTML page?): {e}");
296            Ok(HashMap::new())
297        }
298    }
299}
300
301// ─── Tests ───────────────────────────────────────────────────────────────────
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306    use std::time::{Duration, SystemTime, UNIX_EPOCH};
307
308    fn make_snapshot(captured_at: u64, ttl_secs: Option<u64>) -> SessionSnapshot {
309        SessionSnapshot {
310            origin: "https://example.com".to_string(),
311            cookies: vec![],
312            local_storage: HashMap::new(),
313            captured_at,
314            ttl_secs,
315        }
316    }
317
318    #[test]
319    fn snapshot_not_expired_without_ttl() {
320        let s = make_snapshot(0, None);
321        assert!(!s.is_expired());
322    }
323
324    #[test]
325    fn snapshot_expired_when_past_ttl() {
326        // captured 1000s ago, ttl = 100s → expired
327        let now = SystemTime::now()
328            .duration_since(UNIX_EPOCH)
329            .unwrap_or_default()
330            .as_secs();
331        let s = make_snapshot(now - 1000, Some(100));
332        assert!(s.is_expired());
333    }
334
335    #[test]
336    fn snapshot_not_expired_within_ttl() {
337        let now = SystemTime::now()
338            .duration_since(UNIX_EPOCH)
339            .unwrap_or_default()
340            .as_secs();
341        let s = make_snapshot(now - 10, Some(3600));
342        assert!(!s.is_expired());
343    }
344
345    #[test]
346    fn snapshot_age_is_reasonable() {
347        let now = SystemTime::now()
348            .duration_since(UNIX_EPOCH)
349            .unwrap_or_default()
350            .as_secs();
351        let s = make_snapshot(now - 60, None);
352        let age = s.age();
353        assert!(
354            age >= Duration::from_secs(59),
355            "age should be ≥59s, got {age:?}"
356        );
357        assert!(
358            age < Duration::from_secs(65),
359            "age should be <65s, got {age:?}"
360        );
361    }
362
363    #[test]
364    fn snapshot_roundtrips_json() -> std::result::Result<(), Box<dyn std::error::Error>> {
365        let mut s = make_snapshot(1_700_000_000, Some(7200));
366        s.cookies.push(SessionCookie {
367            name: "session_id".to_string(),
368            value: "abc123".to_string(),
369            domain: "example.com".to_string(),
370            path: "/".to_string(),
371            expires: -1.0,
372            http_only: true,
373            secure: true,
374            same_site: "Lax".to_string(),
375        });
376        s.local_storage
377            .insert("theme".to_string(), "dark".to_string());
378
379        let json = serde_json::to_string(&s)?;
380        let decoded: SessionSnapshot = serde_json::from_str(&json)?;
381
382        assert_eq!(decoded.cookies.len(), 1);
383        if let Some(c) = decoded.cookies.first() {
384            assert_eq!(c.name, "session_id");
385        }
386        assert_eq!(
387            decoded.local_storage.get("theme").map(String::as_str),
388            Some("dark")
389        );
390        assert_eq!(decoded.ttl_secs, Some(7200));
391        Ok(())
392    }
393
394    #[test]
395    fn snapshot_file_roundtrip() -> std::result::Result<(), Box<dyn std::error::Error>> {
396        let s = make_snapshot(0, Some(3600));
397        let dir = std::env::temp_dir();
398        let path = dir.join("stygian_session_test.json");
399        s.save_to_file(&path)?;
400        let loaded = SessionSnapshot::load_from_file(&path)?;
401        assert_eq!(loaded.origin, s.origin);
402        let _ = std::fs::remove_file(&path);
403        Ok(())
404    }
405}