1use std::fmt;
32use std::time::{Duration, Instant};
33
34use async_trait::async_trait;
35use serde_json::{Value, json};
36
37use crate::domain::error::{Result, ServiceError, StygianError};
38use crate::ports::{ScrapingService, ServiceInput, ServiceOutput};
39
40#[derive(Debug, Clone, PartialEq, Eq, Default)]
42pub enum WaitStrategy {
43 #[default]
45 DomContentLoaded,
46 NetworkIdle,
48 SelectorAppears(String),
50 Fixed(Duration),
52}
53
54impl WaitStrategy {
55 fn from_params(params: &Value) -> Self {
57 match params.get("wait_strategy").and_then(Value::as_str) {
58 Some("network_idle") => Self::NetworkIdle,
59 Some("dom_content_loaded") => Self::DomContentLoaded,
60 Some(s) if s.starts_with("selector:") => {
61 Self::SelectorAppears(s.trim_start_matches("selector:").to_string())
62 }
63 _ => params
64 .get("wait_ms")
65 .and_then(Value::as_u64)
66 .map_or(Self::DomContentLoaded, |ms| {
67 Self::Fixed(Duration::from_millis(ms))
68 }),
69 }
70 }
71}
72
73impl fmt::Display for WaitStrategy {
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 match self {
76 Self::DomContentLoaded => write!(f, "dom_content_loaded"),
77 Self::NetworkIdle => write!(f, "network_idle"),
78 Self::SelectorAppears(selector) => write!(f, "selector_appears({selector})"),
79 Self::Fixed(duration) => write!(f, "fixed_{}ms", duration.as_millis()),
80 }
81 }
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
86pub enum StealthLevel {
87 None,
89 #[default]
91 Basic,
92 Advanced,
94}
95
96impl StealthLevel {
97 fn from_params(params: &Value) -> Self {
98 match params.get("stealth_level").and_then(Value::as_str) {
99 Some("advanced") => Self::Advanced,
100 Some("none") => Self::None,
101 _ => Self::Basic,
102 }
103 }
104
105 pub const fn as_str(&self) -> &'static str {
107 match self {
108 Self::None => "none",
109 Self::Basic => "basic",
110 Self::Advanced => "advanced",
111 }
112 }
113}
114
115#[derive(Debug, Clone)]
117pub struct BrowserAdapterConfig {
118 pub timeout: Duration,
120 pub max_concurrent: usize,
122 pub default_wait: WaitStrategy,
124 pub default_stealth: StealthLevel,
126 pub block_resources: bool,
128 pub headless: bool,
130 pub user_agent: Option<String>,
132 pub viewport_width: u32,
134 pub viewport_height: u32,
136}
137
138impl Default for BrowserAdapterConfig {
139 fn default() -> Self {
140 Self {
141 timeout: Duration::from_secs(30),
142 max_concurrent: 5,
143 default_wait: WaitStrategy::DomContentLoaded,
144 default_stealth: StealthLevel::Basic,
145 block_resources: true,
146 headless: true,
147 user_agent: None,
148 viewport_width: 1920,
149 viewport_height: 1080,
150 }
151 }
152}
153
154#[derive(Clone)]
166pub struct BrowserAdapter {
167 config: BrowserAdapterConfig,
168}
169
170impl BrowserAdapter {
171 pub fn new() -> Self {
183 Self {
184 config: BrowserAdapterConfig::default(),
185 }
186 }
187
188 pub const fn with_config(config: BrowserAdapterConfig) -> Self {
204 Self { config }
205 }
206
207 fn resolve_timeout(&self, params: &Value) -> Duration {
209 params
210 .get("timeout_ms")
211 .and_then(Value::as_u64)
212 .map_or(self.config.timeout, Duration::from_millis)
213 }
214
215 #[allow(clippy::option_if_let_else)]
221 #[cfg(feature = "browser")]
222 async fn navigate_with_browser(
223 &self,
224 url: &str,
225 wait: &WaitStrategy,
226 timeout: Duration,
227 ) -> Result<(String, Value)> {
228 use stygian_browser::page::WaitUntil;
229 use stygian_browser::{BrowserConfig, BrowserPool};
230
231 let start = Instant::now();
232
233 let browser_config = BrowserConfig {
235 headless: self.config.headless,
236 ..BrowserConfig::default()
237 };
238
239 let pool = BrowserPool::new(browser_config)
241 .await
242 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
243
244 let handle = match tokio::time::timeout(timeout, pool.acquire()).await {
246 Ok(Ok(h)) => h,
247 Ok(Err(e)) => {
248 return Err(StygianError::Service(ServiceError::Unavailable(format!(
249 "Browser pool exhausted or unavailable: {e}"
250 ))));
251 }
252 Err(_) => {
253 return Err(StygianError::Service(ServiceError::Unavailable(format!(
254 "Browser acquisition timeout after {timeout:?}"
255 ))));
256 }
257 };
258
259 let Some(instance) = handle.browser() else {
261 return Err(StygianError::Service(ServiceError::Unavailable(
262 "Failed to get browser instance after acquisition".to_string(),
263 )));
264 };
265
266 let mut page = instance
267 .new_page()
268 .await
269 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
270
271 let wait_condition = match wait {
273 WaitStrategy::DomContentLoaded => WaitUntil::DomContentLoaded,
274 WaitStrategy::NetworkIdle => WaitUntil::NetworkIdle,
275 WaitStrategy::SelectorAppears(selector) => WaitUntil::Selector(selector.clone()),
276 WaitStrategy::Fixed(_duration) => WaitUntil::DomContentLoaded, };
278
279 if let Err(e) = page.navigate(url, wait_condition, timeout).await {
281 return Err(StygianError::Service(ServiceError::Unavailable(format!(
282 "Browser navigation failed: {e}"
283 ))));
284 }
285
286 if let WaitStrategy::Fixed(duration) = wait {
288 tokio::time::sleep(*duration).await;
289 }
290
291 let html = page
293 .content()
294 .await
295 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
296
297 let elapsed = start.elapsed();
298
299 Ok((
302 html,
303 json!({
304 "url": url,
305 "navigation_time_ms": elapsed.as_millis(),
306 "wait_strategy": wait.to_string(),
307 "stealth_level": self.config.default_stealth.as_str(),
308 "viewport": {
309 "width": self.config.viewport_width,
310 "height": self.config.viewport_height
311 },
312 "rendered": true,
313 }),
314 ))
315 }
316
317 #[cfg(not(feature = "browser"))]
319 async fn navigate_with_browser(
320 &self,
321 url: &str,
322 _wait: &WaitStrategy,
323 _timeout: Duration,
324 ) -> Result<(String, Value)> {
325 Err(StygianError::Service(ServiceError::Unavailable(format!(
326 "stygian-graph was compiled without the 'browser' feature; \
327 cannot render JavaScript for URL: {url}"
328 ))))
329 }
330}
331
332impl Default for BrowserAdapter {
333 fn default() -> Self {
334 Self::new()
335 }
336}
337
338#[async_trait]
339impl ScrapingService for BrowserAdapter {
340 async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
364 let wait = WaitStrategy::from_params(&input.params);
365 let _stealth = StealthLevel::from_params(&input.params);
366 let timeout = self.resolve_timeout(&input.params);
367
368 let (html, metadata) = tokio::time::timeout(
369 timeout + Duration::from_secs(5), self.navigate_with_browser(&input.url, &wait, timeout),
371 )
372 .await
373 .map_err(|_| {
374 StygianError::Service(ServiceError::Timeout(
375 u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX),
376 ))
377 })??;
378
379 Ok(ServiceOutput {
380 data: html,
381 metadata,
382 })
383 }
384
385 fn name(&self) -> &'static str {
386 "browser"
387 }
388}
389
390#[cfg(test)]
391#[allow(
392 clippy::unwrap_used,
393 clippy::expect_used,
394 clippy::panic,
395 clippy::redundant_closure_for_method_calls
396)]
397mod tests {
398 use super::*;
399
400 #[test]
401 fn test_adapter_default_name() {
402 let adapter = BrowserAdapter::new();
403 assert_eq!(adapter.name(), "browser");
404 }
405
406 #[test]
407 fn test_wait_strategy_from_params_dom() {
408 let params = json!({ "wait_strategy": "dom_content_loaded" });
409 assert_eq!(
410 WaitStrategy::from_params(¶ms),
411 WaitStrategy::DomContentLoaded
412 );
413 }
414
415 #[test]
416 fn test_wait_strategy_from_params_network_idle() {
417 let params = json!({ "wait_strategy": "network_idle" });
418 assert_eq!(
419 WaitStrategy::from_params(¶ms),
420 WaitStrategy::NetworkIdle
421 );
422 }
423
424 #[test]
425 fn test_wait_strategy_from_params_selector() {
426 let params = json!({ "wait_strategy": "selector:#main-content" });
427 assert_eq!(
428 WaitStrategy::from_params(¶ms),
429 WaitStrategy::SelectorAppears("#main-content".to_string())
430 );
431 }
432
433 #[test]
434 fn test_wait_strategy_from_params_fixed_ms() {
435 let params = json!({ "wait_ms": 500u64 });
436 assert_eq!(
437 WaitStrategy::from_params(¶ms),
438 WaitStrategy::Fixed(Duration::from_millis(500))
439 );
440 }
441
442 #[test]
443 fn test_stealth_level_from_params() {
444 assert_eq!(
445 StealthLevel::from_params(&json!({ "stealth_level": "advanced" })),
446 StealthLevel::Advanced
447 );
448 assert_eq!(
449 StealthLevel::from_params(&json!({ "stealth_level": "none" })),
450 StealthLevel::None
451 );
452 assert_eq!(StealthLevel::from_params(&json!({})), StealthLevel::Basic);
453 }
454
455 #[test]
456 fn test_resolve_timeout_override() {
457 let adapter = BrowserAdapter::new();
458 let params = json!({ "timeout_ms": 5000u64 });
459 assert_eq!(adapter.resolve_timeout(¶ms), Duration::from_secs(5));
460 }
461
462 #[test]
463 fn test_resolve_timeout_default() {
464 let adapter = BrowserAdapter::new();
465 let params = json!({});
466 assert_eq!(adapter.resolve_timeout(¶ms), Duration::from_secs(30));
467 }
468
469 #[test]
470 fn test_config_builder() {
471 let config = BrowserAdapterConfig {
472 timeout: Duration::from_mins(1),
473 max_concurrent: 3,
474 block_resources: false,
475 ..BrowserAdapterConfig::default()
476 };
477 let adapter = BrowserAdapter::with_config(config);
478 assert_eq!(adapter.config.timeout, Duration::from_mins(1));
479 assert_eq!(adapter.config.max_concurrent, 3);
480 }
481
482 #[allow(clippy::panic)]
483 #[tokio::test]
484 #[ignore = "requires real Chrome binary"]
485 async fn test_execute_returns_service_output_or_unavailable() {
486 let adapter = BrowserAdapter::new();
487 let input = ServiceInput {
488 url: "https://example.com".to_string(),
489 params: json!({ "wait_strategy": "dom_content_loaded" }),
490 };
491 match adapter.execute(input).await {
493 Ok(output) => {
494 assert!(!output.data.is_empty(), "output data should not be empty");
495 assert!(output.metadata.is_object());
496 }
497 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
498 }
500 Err(e) => panic!("unexpected error: {e}"),
501 }
502 }
503
504 #[tokio::test]
507 #[ignore = "requires real Chrome binary and external network access"]
508 async fn browser_adapter_navigates_url() {
509 let config = BrowserAdapterConfig::default();
510 let adapter = BrowserAdapter::with_config(config);
511
512 let input = ServiceInput {
513 url: "https://example.com".to_string(),
514 params: json!({
515 "wait_strategy": "dom_content_loaded",
516 "timeout_ms": 30000
517 }),
518 };
519
520 let result = adapter.execute(input).await;
521
522 match result {
524 Ok(output) => {
525 assert!(!output.data.is_empty());
526 assert!(
527 output
528 .metadata
529 .get("rendered")
530 .and_then(|v| v.as_bool())
531 .unwrap_or(false)
532 );
533 assert!(output.metadata.get("navigation_time_ms").is_some());
534 assert_eq!(
535 output.metadata.get("url").and_then(|v| v.as_str()),
536 Some("https://example.com")
537 );
538 }
539 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
540 }
542 Err(e) => panic!("Unexpected error: {e}"),
543 }
544 }
545
546 #[tokio::test]
547 #[ignore = "Requires Chrome installed and network access; may panic if browser unavailable"]
548 async fn browser_adapter_respects_timeout() {
549 let config = BrowserAdapterConfig {
550 timeout: Duration::from_secs(2),
551 ..Default::default()
552 };
553 let adapter = BrowserAdapter::with_config(config);
554
555 let input = ServiceInput {
557 url: "https://httpbin.org/delay/10".to_string(),
558 params: json!({"timeout_ms": 2000}),
559 };
560
561 let result = adapter.execute(input).await;
562
563 match result {
565 Err(StygianError::Service(ServiceError::Unavailable(msg))) => {
566 assert!(
568 msg.contains("timeout")
569 || msg.contains("unavailable")
570 || msg.contains("Chrome")
571 || msg.contains("exhausted")
572 );
573 }
574 Err(StygianError::Service(ServiceError::Timeout(_))) => {
575 }
577 Ok(_) => {
578 panic!("Expected timeout or unavailable, got success");
580 }
581 Err(e) => {
582 eprintln!("Got acceptable error: {e}");
584 }
585 }
586 }
587
588 #[tokio::test]
589 #[ignore = "requires real Chrome binary"]
590 async fn browser_adapter_invalid_url() {
591 let config = BrowserAdapterConfig::default();
592 let adapter = BrowserAdapter::with_config(config);
593
594 let input = ServiceInput {
595 url: "not-a-valid-url".to_string(),
596 params: json!({}),
597 };
598
599 let result = adapter.execute(input).await;
600
601 assert!(result.is_err());
603 }
604
605 #[tokio::test]
606 #[ignore = "requires real Chrome binary and external network access"]
607 async fn browser_adapter_wait_strategy_selector() {
608 let config = BrowserAdapterConfig::default();
609 let adapter = BrowserAdapter::with_config(config);
610
611 let input = ServiceInput {
612 url: "https://example.com".to_string(),
613 params: json!({
614 "wait_strategy": "selector:body"
615 }),
616 };
617
618 match adapter.execute(input).await {
619 Ok(output) => {
620 assert_eq!(
621 output
622 .metadata
623 .get("wait_strategy")
624 .and_then(|v| v.as_str()),
625 Some("selector_appears(body)")
626 );
627 }
628 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
629 }
631 Err(e) => panic!("Unexpected error: {e}"),
632 }
633 }
634
635 #[tokio::test]
636 #[ignore = "requires real Chrome binary and external network access"]
637 async fn browser_adapter_metadata_complete() {
638 let config = BrowserAdapterConfig {
639 default_stealth: StealthLevel::Advanced,
640 user_agent: Some("Mozilla/5.0".to_string()),
641 viewport_width: 1440,
642 viewport_height: 900,
643 ..Default::default()
644 };
645 let adapter = BrowserAdapter::with_config(config);
646
647 let input = ServiceInput {
648 url: "https://example.com".to_string(),
649 params: json!({}),
650 };
651
652 match adapter.execute(input).await {
653 Ok(output) => {
654 assert_eq!(
655 output.metadata.get("url").and_then(|v| v.as_str()),
656 Some("https://example.com")
657 );
658 assert_eq!(
659 output
660 .metadata
661 .get("stealth_level")
662 .and_then(|v| v.as_str()),
663 Some("advanced")
664 );
665 assert!(output.metadata.get("viewport").is_some());
666 assert!(output.metadata.get("navigation_time_ms").is_some());
667 let viewport = output.metadata.get("viewport").expect("viewport exists");
668 assert_eq!(viewport.get("width").and_then(|v| v.as_u64()), Some(1440));
669 assert_eq!(viewport.get("height").and_then(|v| v.as_u64()), Some(900));
670 }
671 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
672 }
674 Err(e) => panic!("Unexpected error: {e}"),
675 }
676 }
677}