stygian_browser/browser.rs
1//! Browser instance lifecycle management
2//!
3//! Provides a thin wrapper around a `chromiumoxide` [`Browser`] that adds:
4//!
5//! - Anti-detection launch arguments from [`BrowserConfig`]
6//! - Configurable launch and per-operation timeouts via `tokio::time::timeout`
7//! - Health checks using the CDP `Browser.getVersion` command
8//! - PID-based zombie process detection and forced cleanup
9//! - Graceful shutdown (close all pages ➞ send `Browser.close`)
10//!
11//! # Example
12//!
13//! ```no_run
14//! use stygian_browser::{BrowserConfig, browser::BrowserInstance};
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let config = BrowserConfig::default();
18//! let mut instance = BrowserInstance::launch(config).await?;
19//!
20//! assert!(instance.is_healthy().await);
21//! instance.shutdown().await?;
22//! # Ok(())
23//! # }
24//! ```
25
26use std::time::{Duration, Instant};
27
28use chromiumoxide::Browser;
29use futures::StreamExt;
30use tokio::time::timeout;
31use tracing::{debug, info, warn};
32
33use crate::{
34 BrowserConfig,
35 error::{BrowserError, Result},
36};
37
38// ─── BrowserInstance ──────────────────────────────────────────────────────────
39
40/// A managed browser instance with health tracking.
41///
42/// Wraps a `chromiumoxide` [`Browser`] and an async handler task. Always call
43/// [`BrowserInstance::shutdown`] (or drop) after use to release OS resources.
44pub struct BrowserInstance {
45 browser: Browser,
46 config: BrowserConfig,
47 launched_at: Instant,
48 /// Set to `false` after a failed health check so callers know to discard.
49 healthy: bool,
50 /// Convenience ID for log correlation.
51 id: String,
52}
53
54impl BrowserInstance {
55 /// Launch a new browser instance using the provided [`BrowserConfig`].
56 ///
57 /// All configured anti-detection arguments (see
58 /// [`BrowserConfig::effective_args`]) are passed at launch time.
59 ///
60 /// # Errors
61 ///
62 /// - [`BrowserError::LaunchFailed`] if the process does not start within
63 /// `config.launch_timeout`.
64 /// - [`BrowserError::Timeout`] if the browser doesn't respond in time.
65 ///
66 /// # Example
67 ///
68 /// ```no_run
69 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
70 ///
71 /// # async fn run() -> stygian_browser::error::Result<()> {
72 /// let instance = BrowserInstance::launch(BrowserConfig::default()).await?;
73 /// # Ok(())
74 /// # }
75 /// ```
76 pub async fn launch(config: BrowserConfig) -> Result<Self> {
77 let id = ulid::Ulid::new().to_string();
78 let launch_timeout = config.launch_timeout;
79
80 info!(browser_id = %id, "Launching browser");
81
82 let args = config.effective_args();
83 debug!(browser_id = %id, ?args, "Chrome launch arguments");
84
85 let mut builder = chromiumoxide::BrowserConfig::builder();
86
87 // Set headless / headed mode on the chromiumoxide builder.
88 // - headed: with_head()
89 // - headless new (default): new_headless_mode() → --headless=new
90 // - headless legacy: default (chromiumoxide defaults to old --headless)
91 if !config.headless {
92 builder = builder.with_head();
93 } else if config.headless_mode == crate::config::HeadlessMode::New {
94 builder = builder.new_headless_mode();
95 }
96
97 if let Some(path) = &config.chrome_path {
98 builder = builder.chrome_executable(path);
99 }
100
101 // Use the caller-supplied profile dir, or generate a unique temp dir
102 // per instance so concurrent pools never race on SingletonLock.
103 let data_dir = config
104 .user_data_dir
105 .clone()
106 .unwrap_or_else(|| std::env::temp_dir().join(format!("stygian-{id}")));
107 builder = builder.user_data_dir(&data_dir);
108
109 for arg in &args {
110 // chromiumoxide's ArgsBuilder prepends "--" when formatting args, so
111 // we strip any existing "--" prefix first to avoid "----arg" in Chrome.
112 let stripped = arg.strip_prefix("--").unwrap_or(arg.as_str());
113 builder = builder.arg(stripped);
114 }
115
116 if let Some((w, h)) = config.window_size {
117 builder = builder.window_size(w, h);
118 }
119
120 let cdp_cfg = builder
121 .build()
122 .map_err(|e| BrowserError::LaunchFailed { reason: e })?;
123
124 let (browser, mut handler) = timeout(launch_timeout, Browser::launch(cdp_cfg))
125 .await
126 .map_err(|_| BrowserError::Timeout {
127 operation: "browser.launch".to_string(),
128 duration_ms: u64::try_from(launch_timeout.as_millis()).unwrap_or(u64::MAX),
129 })?
130 .map_err(|e| BrowserError::LaunchFailed {
131 reason: e.to_string(),
132 })?;
133
134 // Spawn the chromiumoxide message handler; it must run for the browser
135 // to remain responsive.
136 tokio::spawn(async move { while handler.next().await.is_some() {} });
137
138 info!(browser_id = %id, "Browser launched successfully");
139
140 Ok(Self {
141 browser,
142 config,
143 launched_at: Instant::now(),
144 healthy: true,
145 id,
146 })
147 }
148
149 // ─── Health ───────────────────────────────────────────────────────────────
150
151 /// Returns `true` if the browser is currently considered healthy.
152 ///
153 /// This is a cached value updated by [`BrowserInstance::health_check`].
154 #[must_use]
155 pub const fn is_healthy_cached(&self) -> bool {
156 self.healthy
157 }
158
159 /// Actively probe the browser with a CDP request.
160 ///
161 /// Sends `Browser.getVersion` and waits up to `cdp_timeout`. Updates the
162 /// internal healthy flag and returns the result.
163 ///
164 /// # Example
165 ///
166 /// ```no_run
167 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
168 ///
169 /// # async fn run() -> stygian_browser::error::Result<()> {
170 /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
171 /// assert!(instance.is_healthy().await);
172 /// # Ok(())
173 /// # }
174 /// ```
175 pub async fn is_healthy(&mut self) -> bool {
176 match self.health_check().await {
177 Ok(()) => true,
178 Err(e) => {
179 warn!(browser_id = %self.id, error = %e, "Health check failed");
180 false
181 }
182 }
183 }
184
185 /// Run a health check and return a structured [`Result`].
186 ///
187 /// Pings the browser with the CDP `Browser.getVersion` RPC.
188 ///
189 /// # Errors
190 ///
191 /// Returns [`BrowserError::Timeout`] when the `Browser.getVersion` RPC
192 /// does not complete within `config.cdp_timeout`, and
193 /// [`BrowserError::CdpError`] for any underlying chromiumoxide error
194 /// returned by the CDP call.
195 pub async fn health_check(&mut self) -> Result<()> {
196 let op_timeout = self.config.cdp_timeout;
197
198 timeout(op_timeout, self.browser.version())
199 .await
200 .map_err(|_| {
201 self.healthy = false;
202 BrowserError::Timeout {
203 operation: "Browser.getVersion".to_string(),
204 duration_ms: u64::try_from(op_timeout.as_millis()).unwrap_or(u64::MAX),
205 }
206 })?
207 .map_err(|e| {
208 self.healthy = false;
209 BrowserError::CdpError {
210 operation: "Browser.getVersion".to_string(),
211 message: e.to_string(),
212 }
213 })?;
214
215 self.healthy = true;
216 Ok(())
217 }
218
219 // ─── Accessors ────────────────────────────────────────────────────────────
220
221 /// Access the underlying `chromiumoxide` [`Browser`].
222 #[must_use]
223 pub const fn browser(&self) -> &Browser {
224 &self.browser
225 }
226
227 /// Mutable access to the underlying `chromiumoxide` [`Browser`].
228 pub const fn browser_mut(&mut self) -> &mut Browser {
229 &mut self.browser
230 }
231
232 /// Instance ID (ULID) for log correlation.
233 #[must_use]
234 pub fn id(&self) -> &str {
235 &self.id
236 }
237
238 /// How long has this instance been alive.
239 #[must_use]
240 pub fn uptime(&self) -> Duration {
241 self.launched_at.elapsed()
242 }
243
244 /// The config snapshot used at launch.
245 #[must_use]
246 pub const fn config(&self) -> &BrowserConfig {
247 &self.config
248 }
249
250 // ─── Shutdown ─────────────────────────────────────────────────────────────
251
252 /// Gracefully close the browser.
253 ///
254 /// Sends `Browser.close` and waits up to `cdp_timeout`. Any errors during
255 /// tear-down are logged but not propagated so the caller can always clean up.
256 ///
257 /// # Example
258 ///
259 /// ```no_run
260 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
261 ///
262 /// # async fn run() -> stygian_browser::error::Result<()> {
263 /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
264 /// instance.shutdown().await?;
265 /// # Ok(())
266 /// # }
267 /// ```
268 ///
269 /// # Errors
270 ///
271 /// Returns [`BrowserError::Timeout`] if the `Browser.close` call does
272 /// not complete within `config.cdp_timeout`, and
273 /// [`BrowserError::CdpError`] for any underlying chromiumoxide error
274 /// returned while issuing the close command. Internal teardown steps
275 /// are logged and never propagate a failure upward.
276 pub async fn shutdown(mut self) -> Result<()> {
277 info!(browser_id = %self.id, "Shutting down browser");
278
279 let op_timeout = self.config.cdp_timeout;
280
281 if let Err(e) = timeout(op_timeout, self.browser.close()).await {
282 // Timeout — log and continue cleanup
283 warn!(
284 browser_id = %self.id,
285 "Browser.close timed out after {}ms: {e}",
286 op_timeout.as_millis()
287 );
288 }
289
290 self.healthy = false;
291 info!(browser_id = %self.id, "Browser shut down");
292 Ok(())
293 }
294
295 /// Open a new tab and return a [`crate::page::PageHandle`].
296 ///
297 /// The handle closes the tab automatically when dropped.
298 ///
299 /// # Errors
300 ///
301 /// Returns [`BrowserError::CdpError`] if a new page cannot be created.
302 ///
303 /// # Example
304 ///
305 /// ```no_run
306 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
307 ///
308 /// # async fn run() -> stygian_browser::error::Result<()> {
309 /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
310 /// let page = instance.new_page().await?;
311 /// drop(page);
312 /// instance.shutdown().await?;
313 /// # Ok(())
314 /// # }
315 /// ```
316 pub async fn new_page(&self) -> crate::error::Result<crate::page::PageHandle> {
317 use tokio::time::timeout;
318
319 let cdp_timeout = self.config.cdp_timeout;
320
321 let page = timeout(cdp_timeout, self.browser.new_page("about:blank"))
322 .await
323 .map_err(|_| crate::error::BrowserError::Timeout {
324 operation: "Browser.newPage".to_string(),
325 duration_ms: u64::try_from(cdp_timeout.as_millis()).unwrap_or(u64::MAX),
326 })?
327 .map_err(|e| crate::error::BrowserError::CdpError {
328 operation: "Browser.newPage".to_string(),
329 message: e.to_string(),
330 })?;
331
332 // Apply stealth injection scripts for all active stealth levels.
333 #[cfg(feature = "stealth")]
334 crate::stealth::apply_stealth_to_page(&page, &self.config).await?;
335
336 Ok(crate::page::PageHandle::new(page, cdp_timeout))
337 }
338}
339
340// ─── Tests ────────────────────────────────────────────────────────────────────
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345
346 /// Verify `BrowserConfig` `effective_args` includes anti-detection flags.
347 ///
348 /// This is a unit test that doesn't require a real Chrome binary.
349 #[test]
350 fn effective_args_contain_automation_flag() {
351 let config = BrowserConfig::default();
352 let args = config.effective_args();
353 assert!(
354 args.iter().any(|a| a.contains("AutomationControlled")),
355 "Expected --disable-blink-features=AutomationControlled in args: {args:?}"
356 );
357 }
358
359 #[test]
360 fn proxy_arg_injected_when_set() {
361 let config = BrowserConfig::builder()
362 .proxy("http://proxy.example.com:8080".to_string())
363 .build();
364 let args = config.effective_args();
365 assert!(
366 args.iter().any(|a| a.contains("proxy.example.com")),
367 "Expected proxy arg in {args:?}"
368 );
369 }
370
371 #[test]
372 fn window_size_arg_injected() {
373 let config = BrowserConfig::builder().window_size(1280, 720).build();
374 let args = config.effective_args();
375 assert!(
376 args.iter().any(|a| a.contains("1280")),
377 "Expected window-size arg in {args:?}"
378 );
379 }
380
381 #[test]
382 fn browser_instance_is_send_sync() {
383 fn assert_send<T: Send>() {}
384 fn assert_sync<T: Sync>() {}
385 assert_send::<BrowserInstance>();
386 assert_sync::<BrowserInstance>();
387 }
388
389 #[test]
390 fn no_sandbox_absent_by_default_on_non_linux() {
391 // On non-Linux (macOS, Windows) is_containerized() always returns false,
392 // so --no-sandbox must NOT appear in the default args unless overridden.
393 // On Linux in CI/Docker the STYGIAN_DISABLE_SANDBOX env var or /.dockerenv
394 // controls this — skip the assertion there to avoid false failures.
395 #[cfg(not(target_os = "linux"))]
396 {
397 let cfg = BrowserConfig::default();
398 let args = cfg.effective_args();
399 assert!(!args.iter().any(|a| a == "--no-sandbox"));
400 }
401 }
402
403 #[test]
404 fn effective_args_include_disable_dev_shm() {
405 let cfg = BrowserConfig::default();
406 let args = cfg.effective_args();
407 assert!(args.iter().any(|a| a.contains("disable-dev-shm-usage")));
408 }
409
410 #[test]
411 fn no_window_size_arg_when_none() {
412 let cfg = BrowserConfig {
413 window_size: None,
414 ..BrowserConfig::default()
415 };
416 let args = cfg.effective_args();
417 assert!(!args.iter().any(|a| a.contains("--window-size")));
418 }
419
420 #[test]
421 fn custom_arg_appended() {
422 let cfg = BrowserConfig::builder()
423 .arg("--user-agent=MyCustomBot/1.0".to_string())
424 .build();
425 let args = cfg.effective_args();
426 assert!(args.iter().any(|a| a.contains("MyCustomBot")));
427 }
428
429 #[test]
430 fn proxy_bypass_list_arg_injected() {
431 let cfg = BrowserConfig::builder()
432 .proxy("http://proxy:8080".to_string())
433 .proxy_bypass_list("<local>,localhost".to_string())
434 .build();
435 let args = cfg.effective_args();
436 assert!(args.iter().any(|a| a.contains("proxy-bypass-list")));
437 }
438
439 #[test]
440 fn headless_mode_preserved_in_config() {
441 let cfg = BrowserConfig::builder().headless(false).build();
442 assert!(!cfg.headless);
443 let cfg2 = BrowserConfig::builder().headless(true).build();
444 assert!(cfg2.headless);
445 }
446
447 #[test]
448 fn launch_timeout_default_is_non_zero() {
449 let cfg = BrowserConfig::default();
450 assert!(!cfg.launch_timeout.is_zero());
451 }
452
453 #[test]
454 fn cdp_timeout_default_is_non_zero() {
455 let cfg = BrowserConfig::default();
456 assert!(!cfg.cdp_timeout.is_zero());
457 }
458}