1use std::fmt;
32use std::time::{Duration, Instant};
33
34use async_trait::async_trait;
35use serde_json::{Value, json};
36
37use crate::domain::error::{Result, ServiceError, StygianError};
38use crate::ports::{ScrapingService, ServiceInput, ServiceOutput};
39
40#[derive(Debug, Clone, PartialEq, Eq, Default)]
42pub enum WaitStrategy {
43 #[default]
45 DomContentLoaded,
46 NetworkIdle,
48 SelectorAppears(String),
50 Fixed(Duration),
52}
53
54impl WaitStrategy {
55 fn from_params(params: &Value) -> Self {
57 match params.get("wait_strategy").and_then(Value::as_str) {
58 Some("network_idle") => Self::NetworkIdle,
59 Some("dom_content_loaded") => Self::DomContentLoaded,
60 Some(s) if s.starts_with("selector:") => {
61 Self::SelectorAppears(s.trim_start_matches("selector:").to_string())
62 }
63 _ => params
64 .get("wait_ms")
65 .and_then(Value::as_u64)
66 .map_or(Self::DomContentLoaded, |ms| {
67 Self::Fixed(Duration::from_millis(ms))
68 }),
69 }
70 }
71}
72
73impl fmt::Display for WaitStrategy {
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 match self {
76 Self::DomContentLoaded => write!(f, "dom_content_loaded"),
77 Self::NetworkIdle => write!(f, "network_idle"),
78 Self::SelectorAppears(selector) => write!(f, "selector_appears({selector})"),
79 Self::Fixed(duration) => write!(f, "fixed_{}ms", duration.as_millis()),
80 }
81 }
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
86pub enum StealthLevel {
87 None,
89 #[default]
91 Basic,
92 Advanced,
94}
95
96impl StealthLevel {
97 fn from_params(params: &Value) -> Self {
98 match params.get("stealth_level").and_then(Value::as_str) {
99 Some("advanced") => Self::Advanced,
100 Some("none") => Self::None,
101 _ => Self::Basic,
102 }
103 }
104
105 pub const fn as_str(&self) -> &'static str {
107 match self {
108 Self::None => "none",
109 Self::Basic => "basic",
110 Self::Advanced => "advanced",
111 }
112 }
113}
114
115#[derive(Debug, Clone)]
117pub struct BrowserAdapterConfig {
118 pub timeout: Duration,
120 pub max_concurrent: usize,
122 pub default_wait: WaitStrategy,
124 pub default_stealth: StealthLevel,
126 pub block_resources: bool,
128 pub headless: bool,
130 pub user_agent: Option<String>,
132 pub viewport_width: u32,
134 pub viewport_height: u32,
136}
137
138impl Default for BrowserAdapterConfig {
139 fn default() -> Self {
140 Self {
141 timeout: Duration::from_secs(30),
142 max_concurrent: 5,
143 default_wait: WaitStrategy::DomContentLoaded,
144 default_stealth: StealthLevel::Basic,
145 block_resources: true,
146 headless: true,
147 user_agent: None,
148 viewport_width: 1920,
149 viewport_height: 1080,
150 }
151 }
152}
153
154#[derive(Clone)]
166pub struct BrowserAdapter {
167 config: BrowserAdapterConfig,
168}
169
170impl BrowserAdapter {
171 pub fn new() -> Self {
183 Self {
184 config: BrowserAdapterConfig::default(),
185 }
186 }
187
188 pub const fn with_config(config: BrowserAdapterConfig) -> Self {
204 Self { config }
205 }
206
207 fn resolve_timeout(&self, params: &Value) -> Duration {
209 params
210 .get("timeout_ms")
211 .and_then(Value::as_u64)
212 .map_or(self.config.timeout, Duration::from_millis)
213 }
214
215 #[allow(clippy::option_if_let_else)]
221 #[cfg(feature = "browser")]
222 async fn navigate_with_browser(
223 &self,
224 url: &str,
225 wait: &WaitStrategy,
226 timeout: Duration,
227 ) -> Result<(String, Value)> {
228 use stygian_browser::page::WaitUntil;
229 use stygian_browser::{BrowserConfig, BrowserPool};
230
231 let start = Instant::now();
232
233 let browser_config = BrowserConfig {
235 headless: self.config.headless,
236 ..BrowserConfig::default()
237 };
238
239 let pool = BrowserPool::new(browser_config)
241 .await
242 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
243
244 let handle = match tokio::time::timeout(timeout, pool.acquire()).await {
246 Ok(Ok(h)) => h,
247 Ok(Err(e)) => {
248 return Err(StygianError::Service(ServiceError::Unavailable(format!(
249 "Browser pool exhausted or unavailable: {e}"
250 ))));
251 }
252 Err(_) => {
253 return Err(StygianError::Service(ServiceError::Unavailable(format!(
254 "Browser acquisition timeout after {timeout:?}"
255 ))));
256 }
257 };
258
259 let Some(instance) = handle.browser() else {
261 return Err(StygianError::Service(ServiceError::Unavailable(
262 "Failed to get browser instance after acquisition".to_string(),
263 )));
264 };
265
266 let mut page = instance
267 .new_page()
268 .await
269 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
270
271 let wait_condition = match wait {
273 WaitStrategy::DomContentLoaded => WaitUntil::DomContentLoaded,
274 WaitStrategy::NetworkIdle => WaitUntil::NetworkIdle,
275 WaitStrategy::SelectorAppears(selector) => WaitUntil::Selector(selector.clone()),
276 WaitStrategy::Fixed(_duration) => WaitUntil::DomContentLoaded, };
278
279 if let Err(e) = page.navigate(url, wait_condition, timeout).await {
281 return Err(StygianError::Service(ServiceError::Unavailable(format!(
282 "Browser navigation failed: {e}"
283 ))));
284 }
285
286 if let WaitStrategy::Fixed(duration) = wait {
288 tokio::time::sleep(*duration).await;
289 }
290
291 let html = page
293 .content()
294 .await
295 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
296
297 let elapsed = start.elapsed();
298
299 Ok((
302 html,
303 json!({
304 "url": url,
305 "navigation_time_ms": elapsed.as_millis(),
306 "wait_strategy": wait.to_string(),
307 "stealth_level": self.config.default_stealth.as_str(),
308 "viewport": {
309 "width": self.config.viewport_width,
310 "height": self.config.viewport_height
311 },
312 "rendered": true,
313 }),
314 ))
315 }
316
317 #[cfg(not(feature = "browser"))]
319 async fn navigate_with_browser(
320 &self,
321 url: &str,
322 _wait: &WaitStrategy,
323 _timeout: Duration,
324 ) -> Result<(String, Value)> {
325 Err(StygianError::Service(ServiceError::Unavailable(format!(
326 "stygian-graph was compiled without the 'browser' feature; \
327 cannot render JavaScript for URL: {url}"
328 ))))
329 }
330}
331
332impl Default for BrowserAdapter {
333 fn default() -> Self {
334 Self::new()
335 }
336}
337
338#[async_trait]
339impl ScrapingService for BrowserAdapter {
340 async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
364 let wait = WaitStrategy::from_params(&input.params);
365 let _stealth = StealthLevel::from_params(&input.params);
366 let timeout = self.resolve_timeout(&input.params);
367
368 let (html, metadata) = tokio::time::timeout(
369 timeout + Duration::from_secs(5), self.navigate_with_browser(&input.url, &wait, timeout),
371 )
372 .await
373 .map_err(|_| {
374 StygianError::Service(ServiceError::Timeout(
375 u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX),
376 ))
377 })??;
378
379 Ok(ServiceOutput {
380 data: html,
381 metadata,
382 })
383 }
384
385 fn name(&self) -> &'static str {
386 "browser"
387 }
388}
389
390#[cfg(test)]
391#[allow(
392 clippy::unwrap_used,
393 clippy::expect_used,
394 clippy::panic,
395 clippy::redundant_closure_for_method_calls
396)]
397mod tests {
398 use super::*;
399
400 #[test]
401 fn test_adapter_default_name() {
402 let adapter = BrowserAdapter::new();
403 assert_eq!(adapter.name(), "browser");
404 }
405
406 #[test]
407 fn test_wait_strategy_from_params_dom() {
408 let params = json!({ "wait_strategy": "dom_content_loaded" });
409 assert_eq!(
410 WaitStrategy::from_params(¶ms),
411 WaitStrategy::DomContentLoaded
412 );
413 }
414
415 #[test]
416 fn test_wait_strategy_from_params_network_idle() {
417 let params = json!({ "wait_strategy": "network_idle" });
418 assert_eq!(
419 WaitStrategy::from_params(¶ms),
420 WaitStrategy::NetworkIdle
421 );
422 }
423
424 #[test]
425 fn test_wait_strategy_from_params_selector() {
426 let params = json!({ "wait_strategy": "selector:#main-content" });
427 assert_eq!(
428 WaitStrategy::from_params(¶ms),
429 WaitStrategy::SelectorAppears("#main-content".to_string())
430 );
431 }
432
433 #[test]
434 fn test_wait_strategy_from_params_fixed_ms() {
435 let params = json!({ "wait_ms": 500u64 });
436 assert_eq!(
437 WaitStrategy::from_params(¶ms),
438 WaitStrategy::Fixed(Duration::from_millis(500))
439 );
440 }
441
442 #[test]
443 fn test_stealth_level_from_params() {
444 assert_eq!(
445 StealthLevel::from_params(&json!({ "stealth_level": "advanced" })),
446 StealthLevel::Advanced
447 );
448 assert_eq!(
449 StealthLevel::from_params(&json!({ "stealth_level": "none" })),
450 StealthLevel::None
451 );
452 assert_eq!(StealthLevel::from_params(&json!({})), StealthLevel::Basic);
453 }
454
455 #[test]
456 fn test_resolve_timeout_override() {
457 let adapter = BrowserAdapter::new();
458 let params = json!({ "timeout_ms": 5000u64 });
459 assert_eq!(
460 adapter.resolve_timeout(¶ms),
461 Duration::from_millis(5000)
462 );
463 }
464
465 #[test]
466 fn test_resolve_timeout_default() {
467 let adapter = BrowserAdapter::new();
468 let params = json!({});
469 assert_eq!(adapter.resolve_timeout(¶ms), Duration::from_secs(30));
470 }
471
472 #[test]
473 fn test_config_builder() {
474 let config = BrowserAdapterConfig {
475 timeout: Duration::from_secs(60),
476 max_concurrent: 3,
477 block_resources: false,
478 ..BrowserAdapterConfig::default()
479 };
480 let adapter = BrowserAdapter::with_config(config);
481 assert_eq!(adapter.config.timeout, Duration::from_secs(60));
482 assert_eq!(adapter.config.max_concurrent, 3);
483 }
484
485 #[allow(clippy::panic)]
486 #[tokio::test]
487 #[ignore = "requires real Chrome binary"]
488 async fn test_execute_returns_service_output_or_unavailable() {
489 let adapter = BrowserAdapter::new();
490 let input = ServiceInput {
491 url: "https://example.com".to_string(),
492 params: json!({ "wait_strategy": "dom_content_loaded" }),
493 };
494 match adapter.execute(input).await {
496 Ok(output) => {
497 assert!(!output.data.is_empty(), "output data should not be empty");
498 assert!(output.metadata.is_object());
499 }
500 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
501 }
503 Err(e) => panic!("unexpected error: {e}"),
504 }
505 }
506
507 #[tokio::test]
510 #[ignore = "requires real Chrome binary and external network access"]
511 async fn browser_adapter_navigates_url() {
512 let config = BrowserAdapterConfig::default();
513 let adapter = BrowserAdapter::with_config(config);
514
515 let input = ServiceInput {
516 url: "https://example.com".to_string(),
517 params: json!({
518 "wait_strategy": "dom_content_loaded",
519 "timeout_ms": 30000
520 }),
521 };
522
523 let result = adapter.execute(input).await;
524
525 match result {
527 Ok(output) => {
528 assert!(!output.data.is_empty());
529 assert!(
530 output
531 .metadata
532 .get("rendered")
533 .and_then(|v| v.as_bool())
534 .unwrap_or(false)
535 );
536 assert!(output.metadata.get("navigation_time_ms").is_some());
537 assert_eq!(
538 output.metadata.get("url").and_then(|v| v.as_str()),
539 Some("https://example.com")
540 );
541 }
542 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
543 }
545 Err(e) => panic!("Unexpected error: {e}"),
546 }
547 }
548
549 #[tokio::test]
550 #[ignore = "Requires Chrome installed and network access; may panic if browser unavailable"]
551 async fn browser_adapter_respects_timeout() {
552 let config = BrowserAdapterConfig {
553 timeout: Duration::from_secs(2),
554 ..Default::default()
555 };
556 let adapter = BrowserAdapter::with_config(config);
557
558 let input = ServiceInput {
560 url: "https://httpbin.org/delay/10".to_string(),
561 params: json!({"timeout_ms": 2000}),
562 };
563
564 let result = adapter.execute(input).await;
565
566 match result {
568 Err(StygianError::Service(ServiceError::Unavailable(msg))) => {
569 assert!(
571 msg.contains("timeout")
572 || msg.contains("unavailable")
573 || msg.contains("Chrome")
574 || msg.contains("exhausted")
575 );
576 }
577 Err(StygianError::Service(ServiceError::Timeout(_))) => {
578 }
580 Ok(_) => {
581 panic!("Expected timeout or unavailable, got success");
583 }
584 Err(e) => {
585 eprintln!("Got acceptable error: {e}");
587 }
588 }
589 }
590
591 #[tokio::test]
592 #[ignore = "requires real Chrome binary"]
593 async fn browser_adapter_invalid_url() {
594 let config = BrowserAdapterConfig::default();
595 let adapter = BrowserAdapter::with_config(config);
596
597 let input = ServiceInput {
598 url: "not-a-valid-url".to_string(),
599 params: json!({}),
600 };
601
602 let result = adapter.execute(input).await;
603
604 assert!(result.is_err());
606 }
607
608 #[tokio::test]
609 #[ignore = "requires real Chrome binary and external network access"]
610 async fn browser_adapter_wait_strategy_selector() {
611 let config = BrowserAdapterConfig::default();
612 let adapter = BrowserAdapter::with_config(config);
613
614 let input = ServiceInput {
615 url: "https://example.com".to_string(),
616 params: json!({
617 "wait_strategy": "selector:body"
618 }),
619 };
620
621 match adapter.execute(input).await {
622 Ok(output) => {
623 assert_eq!(
624 output
625 .metadata
626 .get("wait_strategy")
627 .and_then(|v| v.as_str()),
628 Some("selector_appears(body)")
629 );
630 }
631 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
632 }
634 Err(e) => panic!("Unexpected error: {e}"),
635 }
636 }
637
638 #[tokio::test]
639 #[ignore = "requires real Chrome binary and external network access"]
640 async fn browser_adapter_metadata_complete() {
641 let config = BrowserAdapterConfig {
642 default_stealth: StealthLevel::Advanced,
643 user_agent: Some("Mozilla/5.0".to_string()),
644 viewport_width: 1440,
645 viewport_height: 900,
646 ..Default::default()
647 };
648 let adapter = BrowserAdapter::with_config(config);
649
650 let input = ServiceInput {
651 url: "https://example.com".to_string(),
652 params: json!({}),
653 };
654
655 match adapter.execute(input).await {
656 Ok(output) => {
657 assert_eq!(
658 output.metadata.get("url").and_then(|v| v.as_str()),
659 Some("https://example.com")
660 );
661 assert_eq!(
662 output
663 .metadata
664 .get("stealth_level")
665 .and_then(|v| v.as_str()),
666 Some("advanced")
667 );
668 assert!(output.metadata.get("viewport").is_some());
669 assert!(output.metadata.get("navigation_time_ms").is_some());
670 let viewport = output.metadata.get("viewport").expect("viewport exists");
671 assert_eq!(viewport.get("width").and_then(|v| v.as_u64()), Some(1440));
672 assert_eq!(viewport.get("height").and_then(|v| v.as_u64()), Some(900));
673 }
674 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
675 }
677 Err(e) => panic!("Unexpected error: {e}"),
678 }
679 }
680}