Skip to main content

stygian_browser/
browser.rs

1//! Browser instance lifecycle management
2//!
3//! Provides a thin wrapper around a `chromiumoxide` [`Browser`] that adds:
4//!
5//! - Anti-detection launch arguments from [`BrowserConfig`]
6//! - Configurable launch and per-operation timeouts via `tokio::time::timeout`
7//! - Health checks using the CDP `Browser.getVersion` command
8//! - PID-based zombie process detection and forced cleanup
9//! - Graceful shutdown (close all pages ➞ send `Browser.close`)
10//!
11//! # Example
12//!
13//! ```no_run
14//! use stygian_browser::{BrowserConfig, browser::BrowserInstance};
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let config = BrowserConfig::default();
18//! let mut instance = BrowserInstance::launch(config).await?;
19//!
20//! assert!(instance.is_healthy().await);
21//! instance.shutdown().await?;
22//! # Ok(())
23//! # }
24//! ```
25
26use std::time::{Duration, Instant};
27
28use chromiumoxide::Browser;
29use futures::StreamExt;
30use tokio::time::timeout;
31use tracing::{debug, info, warn};
32
33use crate::{
34    BrowserConfig,
35    error::{BrowserError, Result},
36};
37
38// ─── BrowserInstance ──────────────────────────────────────────────────────────
39
40/// A managed browser instance with health tracking.
41///
42/// Wraps a `chromiumoxide` [`Browser`] and an async handler task.  Always call
43/// [`BrowserInstance::shutdown`] (or drop) after use to release OS resources.
44pub struct BrowserInstance {
45    browser: Browser,
46    config: BrowserConfig,
47    launched_at: Instant,
48    /// Set to `false` after a failed health check so callers know to discard.
49    healthy: bool,
50    /// Convenience ID for log correlation.
51    id: String,
52}
53
54impl BrowserInstance {
55    /// Launch a new browser instance using the provided [`BrowserConfig`].
56    ///
57    /// All configured anti-detection arguments (see
58    /// [`BrowserConfig::effective_args`]) are passed at launch time.
59    ///
60    /// # Errors
61    ///
62    /// - [`BrowserError::LaunchFailed`] if the process does not start within
63    ///   `config.launch_timeout`.
64    /// - [`BrowserError::Timeout`] if the browser doesn't respond in time.
65    ///
66    /// # Example
67    ///
68    /// ```no_run
69    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
70    ///
71    /// # async fn run() -> stygian_browser::error::Result<()> {
72    /// let instance = BrowserInstance::launch(BrowserConfig::default()).await?;
73    /// # Ok(())
74    /// # }
75    /// ```
76    pub async fn launch(config: BrowserConfig) -> Result<Self> {
77        let id = ulid::Ulid::new().to_string();
78        let launch_timeout = config.launch_timeout;
79
80        info!(browser_id = %id, "Launching browser");
81
82        let args = config.effective_args();
83        debug!(browser_id = %id, ?args, "Chrome launch arguments");
84
85        let mut builder = chromiumoxide::BrowserConfig::builder();
86
87        // Set headless / headed mode on the chromiumoxide builder.
88        // - headed: with_head()
89        // - headless new (default): new_headless_mode() → --headless=new
90        // - headless legacy: default (chromiumoxide defaults to old --headless)
91        if !config.headless {
92            builder = builder.with_head();
93        } else if config.headless_mode == crate::config::HeadlessMode::New {
94            builder = builder.new_headless_mode();
95        }
96
97        if let Some(path) = &config.chrome_path {
98            builder = builder.chrome_executable(path);
99        }
100
101        // Use the caller-supplied profile dir, or generate a unique temp dir
102        // per instance so concurrent pools never race on SingletonLock.
103        let data_dir = config
104            .user_data_dir
105            .clone()
106            .unwrap_or_else(|| std::env::temp_dir().join(format!("stygian-{id}")));
107        builder = builder.user_data_dir(&data_dir);
108
109        for arg in &args {
110            // chromiumoxide's ArgsBuilder prepends "--" when formatting args, so
111            // we strip any existing "--" prefix first to avoid "----arg" in Chrome.
112            let stripped = arg.strip_prefix("--").unwrap_or(arg.as_str());
113            builder = builder.arg(stripped);
114        }
115
116        if let Some((w, h)) = config.window_size {
117            builder = builder.window_size(w, h);
118        }
119
120        let cdp_cfg = builder
121            .build()
122            .map_err(|e| BrowserError::LaunchFailed { reason: e })?;
123
124        let (browser, mut handler) = timeout(launch_timeout, Browser::launch(cdp_cfg))
125            .await
126            .map_err(|_| BrowserError::Timeout {
127                operation: "browser.launch".to_string(),
128                duration_ms: u64::try_from(launch_timeout.as_millis()).unwrap_or(u64::MAX),
129            })?
130            .map_err(|e| BrowserError::LaunchFailed {
131                reason: e.to_string(),
132            })?;
133
134        // Spawn the chromiumoxide message handler; it must run for the browser
135        // to remain responsive.
136        tokio::spawn(async move { while handler.next().await.is_some() {} });
137
138        info!(browser_id = %id, "Browser launched successfully");
139
140        Ok(Self {
141            browser,
142            config,
143            launched_at: Instant::now(),
144            healthy: true,
145            id,
146        })
147    }
148
149    // ─── Health ───────────────────────────────────────────────────────────────
150
151    /// Returns `true` if the browser is currently considered healthy.
152    ///
153    /// This is a cached value updated by [`BrowserInstance::health_check`].
154    #[must_use]
155    pub const fn is_healthy_cached(&self) -> bool {
156        self.healthy
157    }
158
159    /// Actively probe the browser with a CDP request.
160    ///
161    /// Sends `Browser.getVersion` and waits up to `cdp_timeout`.  Updates the
162    /// internal healthy flag and returns the result.
163    ///
164    /// # Example
165    ///
166    /// ```no_run
167    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
168    ///
169    /// # async fn run() -> stygian_browser::error::Result<()> {
170    /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
171    /// assert!(instance.is_healthy().await);
172    /// # Ok(())
173    /// # }
174    /// ```
175    pub async fn is_healthy(&mut self) -> bool {
176        match self.health_check().await {
177            Ok(()) => true,
178            Err(e) => {
179                warn!(browser_id = %self.id, error = %e, "Health check failed");
180                false
181            }
182        }
183    }
184
185    /// Run a health check and return a structured [`Result`].
186    ///
187    /// Pings the browser with the CDP `Browser.getVersion` RPC.
188    ///
189    /// # Errors
190    ///
191    /// Returns [`BrowserError::Timeout`] when the `Browser.getVersion` RPC
192    /// does not complete within `config.cdp_timeout`, and
193    /// [`BrowserError::CdpError`] for any underlying chromiumoxide error
194    /// returned by the CDP call.
195    pub async fn health_check(&mut self) -> Result<()> {
196        let op_timeout = self.config.cdp_timeout;
197
198        timeout(op_timeout, self.browser.version())
199            .await
200            .map_err(|_| {
201                self.healthy = false;
202                BrowserError::Timeout {
203                    operation: "Browser.getVersion".to_string(),
204                    duration_ms: u64::try_from(op_timeout.as_millis()).unwrap_or(u64::MAX),
205                }
206            })?
207            .map_err(|e| {
208                self.healthy = false;
209                BrowserError::CdpError {
210                    operation: "Browser.getVersion".to_string(),
211                    message: e.to_string(),
212                }
213            })?;
214
215        self.healthy = true;
216        Ok(())
217    }
218
219    // ─── Accessors ────────────────────────────────────────────────────────────
220
221    /// Access the underlying `chromiumoxide` [`Browser`].
222    #[must_use]
223    pub const fn browser(&self) -> &Browser {
224        &self.browser
225    }
226
227    /// Mutable access to the underlying `chromiumoxide` [`Browser`].
228    pub const fn browser_mut(&mut self) -> &mut Browser {
229        &mut self.browser
230    }
231
232    /// Instance ID (ULID) for log correlation.
233    #[must_use]
234    pub fn id(&self) -> &str {
235        &self.id
236    }
237
238    /// How long has this instance been alive.
239    #[must_use]
240    pub fn uptime(&self) -> Duration {
241        self.launched_at.elapsed()
242    }
243
244    /// The config snapshot used at launch.
245    #[must_use]
246    pub const fn config(&self) -> &BrowserConfig {
247        &self.config
248    }
249
250    // ─── Shutdown ─────────────────────────────────────────────────────────────
251
252    /// Gracefully close the browser.
253    ///
254    /// Sends `Browser.close` and waits up to `cdp_timeout`.  Any errors during
255    /// tear-down are logged but not propagated so the caller can always clean up.
256    ///
257    /// # Example
258    ///
259    /// ```no_run
260    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
261    ///
262    /// # async fn run() -> stygian_browser::error::Result<()> {
263    /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
264    /// instance.shutdown().await?;
265    /// # Ok(())
266    /// # }
267    /// ```
268    ///
269    /// # Errors
270    ///
271    /// Returns [`BrowserError::Timeout`] if the `Browser.close` call does
272    /// not complete within `config.cdp_timeout`, and
273    /// [`BrowserError::CdpError`] for any underlying chromiumoxide error
274    /// returned while issuing the close command. Internal teardown steps
275    /// are logged and never propagate a failure upward.
276    pub async fn shutdown(mut self) -> Result<()> {
277        info!(browser_id = %self.id, "Shutting down browser");
278
279        let op_timeout = self.config.cdp_timeout;
280
281        if let Err(e) = timeout(op_timeout, self.browser.close()).await {
282            // Timeout — log and continue cleanup
283            warn!(
284                browser_id = %self.id,
285                "Browser.close timed out after {}ms: {e}",
286                op_timeout.as_millis()
287            );
288        }
289
290        self.healthy = false;
291        info!(browser_id = %self.id, "Browser shut down");
292        Ok(())
293    }
294
295    /// Open a new tab and return a [`crate::page::PageHandle`].
296    ///
297    /// The handle closes the tab automatically when dropped.
298    ///
299    /// # Errors
300    ///
301    /// Returns [`BrowserError::CdpError`] if a new page cannot be created.
302    ///
303    /// # Example
304    ///
305    /// ```no_run
306    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
307    ///
308    /// # async fn run() -> stygian_browser::error::Result<()> {
309    /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
310    /// let page = instance.new_page().await?;
311    /// drop(page);
312    /// instance.shutdown().await?;
313    /// # Ok(())
314    /// # }
315    /// ```
316    pub async fn new_page(&self) -> crate::error::Result<crate::page::PageHandle> {
317        use tokio::time::timeout;
318
319        let cdp_timeout = self.config.cdp_timeout;
320
321        let page = timeout(cdp_timeout, self.browser.new_page("about:blank"))
322            .await
323            .map_err(|_| crate::error::BrowserError::Timeout {
324                operation: "Browser.newPage".to_string(),
325                duration_ms: u64::try_from(cdp_timeout.as_millis()).unwrap_or(u64::MAX),
326            })?
327            .map_err(|e| crate::error::BrowserError::CdpError {
328                operation: "Browser.newPage".to_string(),
329                message: e.to_string(),
330            })?;
331
332        // Apply stealth injection scripts for all active stealth levels.
333        #[cfg(feature = "stealth")]
334        crate::stealth::apply_stealth_to_page(&page, &self.config).await?;
335
336        Ok(crate::page::PageHandle::new(page, cdp_timeout))
337    }
338}
339
340// ─── Tests ────────────────────────────────────────────────────────────────────
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    /// Verify `BrowserConfig` `effective_args` includes anti-detection flags.
347    ///
348    /// This is a unit test that doesn't require a real Chrome binary.
349    #[test]
350    fn effective_args_contain_automation_flag() {
351        let config = BrowserConfig::default();
352        let args = config.effective_args();
353        assert!(
354            args.iter().any(|a| a.contains("AutomationControlled")),
355            "Expected --disable-blink-features=AutomationControlled in args: {args:?}"
356        );
357    }
358
359    #[test]
360    fn proxy_arg_injected_when_set() {
361        let config = BrowserConfig::builder()
362            .proxy("http://proxy.example.com:8080".to_string())
363            .build();
364        let args = config.effective_args();
365        assert!(
366            args.iter().any(|a| a.contains("proxy.example.com")),
367            "Expected proxy arg in {args:?}"
368        );
369    }
370
371    #[test]
372    fn window_size_arg_injected() {
373        let config = BrowserConfig::builder().window_size(1280, 720).build();
374        let args = config.effective_args();
375        assert!(
376            args.iter().any(|a| a.contains("1280")),
377            "Expected window-size arg in {args:?}"
378        );
379    }
380
381    #[test]
382    fn browser_instance_is_send_sync() {
383        fn assert_send<T: Send>() {}
384        fn assert_sync<T: Sync>() {}
385        assert_send::<BrowserInstance>();
386        assert_sync::<BrowserInstance>();
387    }
388
389    #[test]
390    fn no_sandbox_absent_by_default_on_non_linux() {
391        // On non-Linux (macOS, Windows) is_containerized() always returns false,
392        // so --no-sandbox must NOT appear in the default args unless overridden.
393        // On Linux in CI/Docker the STYGIAN_DISABLE_SANDBOX env var or /.dockerenv
394        // controls this — skip the assertion there to avoid false failures.
395        #[cfg(not(target_os = "linux"))]
396        {
397            let cfg = BrowserConfig::default();
398            let args = cfg.effective_args();
399            assert!(!args.iter().any(|a| a == "--no-sandbox"));
400        }
401    }
402
403    #[test]
404    fn effective_args_include_disable_dev_shm() {
405        let cfg = BrowserConfig::default();
406        let args = cfg.effective_args();
407        assert!(args.iter().any(|a| a.contains("disable-dev-shm-usage")));
408    }
409
410    #[test]
411    fn no_window_size_arg_when_none() {
412        let cfg = BrowserConfig {
413            window_size: None,
414            ..BrowserConfig::default()
415        };
416        let args = cfg.effective_args();
417        assert!(!args.iter().any(|a| a.contains("--window-size")));
418    }
419
420    #[test]
421    fn custom_arg_appended() {
422        let cfg = BrowserConfig::builder()
423            .arg("--user-agent=MyCustomBot/1.0".to_string())
424            .build();
425        let args = cfg.effective_args();
426        assert!(args.iter().any(|a| a.contains("MyCustomBot")));
427    }
428
429    #[test]
430    fn proxy_bypass_list_arg_injected() {
431        let cfg = BrowserConfig::builder()
432            .proxy("http://proxy:8080".to_string())
433            .proxy_bypass_list("<local>,localhost".to_string())
434            .build();
435        let args = cfg.effective_args();
436        assert!(args.iter().any(|a| a.contains("proxy-bypass-list")));
437    }
438
439    #[test]
440    fn headless_mode_preserved_in_config() {
441        let cfg = BrowserConfig::builder().headless(false).build();
442        assert!(!cfg.headless);
443        let cfg2 = BrowserConfig::builder().headless(true).build();
444        assert!(cfg2.headless);
445    }
446
447    #[test]
448    fn launch_timeout_default_is_non_zero() {
449        let cfg = BrowserConfig::default();
450        assert!(!cfg.launch_timeout.is_zero());
451    }
452
453    #[test]
454    fn cdp_timeout_default_is_non_zero() {
455        let cfg = BrowserConfig::default();
456        assert!(!cfg.cdp_timeout.is_zero());
457    }
458}