1use std::{
73 collections::HashMap,
74 sync::{Arc, LazyLock},
75 time::Duration,
76};
77
78use chromiumoxide::Browser;
79use serde::{Deserialize, Serialize};
80use serde_json::{Value, json};
81use tokio::{
82 io::{AsyncBufReadExt, AsyncWriteExt, BufReader},
83 sync::Mutex,
84 task::JoinHandle,
85 time::sleep,
86};
87use tracing::{debug, info};
88use ulid::Ulid;
89
90#[cfg(feature = "mcp-attach")]
91use futures::StreamExt;
92
93use crate::{
94 AcquisitionMode, AcquisitionRequest, AcquisitionResult, AcquisitionRunner, BrowserConfig,
95 BrowserHandle, BrowserPool,
96 behavior::{InteractionLevel, InteractionSimulator},
97 behavior_adapter::{BehaviorInteractionLevel, PolymorphicBehaviorAdapter},
98 config::StealthLevel,
99 error::{BrowserError, Result},
100 page::WaitUntil,
101 session::{SessionSnapshot, restore_session, save_session},
102};
103
104#[derive(Debug, Deserialize)]
108pub struct JsonRpcRequest {
109 pub jsonrpc: String,
111 pub method: String,
113 #[serde(default)]
115 pub params: Value,
116 #[serde(default)]
118 pub id: Value,
119}
120
121#[derive(Debug, Serialize)]
123pub struct JsonRpcResponse {
124 jsonrpc: &'static str,
125 #[serde(skip_serializing_if = "Option::is_none")]
126 result: Option<Value>,
127 #[serde(skip_serializing_if = "Option::is_none")]
128 error: Option<JsonRpcError>,
129 id: Value,
130}
131
132#[derive(Debug, Serialize)]
134pub struct JsonRpcError {
135 code: i32,
136 message: String,
137 #[serde(skip_serializing_if = "Option::is_none")]
138 data: Option<Value>,
139}
140
141impl JsonRpcResponse {
142 const fn ok(id: Value, result: Value) -> Self {
143 Self {
144 jsonrpc: "2.0",
145 result: Some(result),
146 error: None,
147 id,
148 }
149 }
150
151 fn err(id: Value, code: i32, message: impl Into<String>) -> Self {
152 Self {
153 jsonrpc: "2.0",
154 result: None,
155 error: Some(JsonRpcError {
156 code,
157 message: message.into(),
158 data: None,
159 }),
160 id,
161 }
162 }
163
164 fn method_not_found(id: Value, method: &str) -> Self {
165 Self::err(id, -32601, format!("Method not found: {method}"))
166 }
167}
168
169struct McpSession {
177 handle: Arc<Mutex<Option<BrowserHandle>>>,
179 attached_browser: Arc<Mutex<Option<Browser>>>,
181 attached_handler_task: Arc<Mutex<Option<JoinHandle<()>>>>,
183 page: Arc<Mutex<Option<crate::page::PageHandle>>>,
185 stealth_level: StealthLevel,
187 tls_profile: Option<String>,
189 webrtc_policy: Option<String>,
191 cdp_fix_mode: Option<String>,
193 proxy: Option<String>,
195 target_profile: String,
197 current_url: Option<String>,
199 saved_snapshot: Option<SessionSnapshot>,
201 attach_endpoint: Option<String>,
203 behavior_plan: Option<crate::behavior_adapter::AppliedBehaviorPlan>,
205}
206
207static TOOL_DEFINITIONS: LazyLock<Vec<Value>> = LazyLock::new(|| {
226 let mut tools = vec![
227 json!({
228 "name": "browser_acquire",
229 "description": "Acquire a browser from the pool and open a session. The optional parameters are stored as session metadata labels and echoed back in the response; they do not reconfigure the pool-acquired browser at runtime. Use them to annotate sessions (e.g. for `browser_verify_stealth` attribution).",
230 "inputSchema": {
231 "type": "object",
232 "properties": {
233 "stealth_level": {
234 "type": "string",
235 "enum": ["none", "basic", "advanced"],
236 "description": "Anti-detection intensity. Defaults to 'advanced'."
237 },
238 "tls_profile": {
239 "type": "string",
240 "description": "TLS fingerprint profile label (free-form; requires stealth feature; browser-launch-level). Examples: chrome131, firefox133, safari18, edge131."
241 },
242 "webrtc_policy": {
243 "type": "string",
244 "description": "WebRTC IP-leak policy label (free-form; requires stealth feature; browser-launch-level). Examples: allow_all, disable_non_proxied, block_all."
245 },
246 "cdp_fix_mode": {
247 "type": "string",
248 "enum": ["addBinding", "isolatedWorld", "enableDisable", "none"],
249 "description": "CDP Runtime.enable leak-mitigation mode."
250 },
251 "proxy": {
252 "type": "string",
253 "description": "HTTP/SOCKS proxy URL, e.g. 'http://user:pass@host:port'. Only pass this when the user has explicitly requested proxy use or you have already acquired a proxy via proxy_acquire. Do NOT populate this field by default."
254 },
255 "target_profile": {
256 "type": "string",
257 "enum": ["default", "reddit"],
258 "description": "Optional target tuning profile. 'reddit' enables challenge-aware waits and stabilization tuned for Reddit flows."
259 }
260 },
261 "required": []
262 }
263 }),
264 json!({
265 "name": "browser_navigate",
266 "description": "Navigate to a URL within a session. Opens a new page if needed.",
267 "inputSchema": {
268 "type": "object",
269 "properties": {
270 "session_id": { "type": "string" },
271 "url": { "type": "string" },
272 "timeout_secs": { "type": "integer", "default": 30 }
273 },
274 "required": ["session_id", "url"]
275 }
276 }),
277 json!({
278 "name": "browser_acquire_and_extract",
279 "description": "Run the opinionated acquisition ladder and return structured extraction/content output in one call. Uses AcquisitionRunner facade with deterministic strategy escalation.",
280 "inputSchema": {
281 "type": "object",
282 "properties": {
283 "url": { "type": "string", "description": "Target URL to acquire." },
284 "mode": {
285 "type": "string",
286 "enum": ["fast", "resilient", "hostile", "investigate"],
287 "description": "Acquisition ladder mode."
288 },
289 "wait_for_selector": {
290 "type": "string",
291 "description": "Optional selector wait gate for browser-stage success."
292 },
293 "selector_wait": {
294 "type": "string",
295 "description": "Alias for wait_for_selector."
296 },
297 "extraction_js": {
298 "type": "string",
299 "description": "Optional JavaScript extraction expression evaluated in browser stages."
300 },
301 "total_timeout_secs": {
302 "type": "number",
303 "default": 45,
304 "description": "Optional wall-clock timeout for the full acquisition run."
305 }
306 },
307 "required": ["url", "mode"]
308 }
309 }),
310 json!({
311 "name": "browser_eval",
312 "description": "Evaluate JavaScript in the current page of a session.",
313 "inputSchema": {
314 "type": "object",
315 "properties": {
316 "session_id": { "type": "string" },
317 "script": { "type": "string" }
318 },
319 "required": ["session_id", "script"]
320 }
321 }),
322 json!({
323 "name": "browser_screenshot",
324 "description": "Capture a full-page PNG screenshot. Returns base64-encoded PNG.",
325 "inputSchema": {
326 "type": "object",
327 "properties": {
328 "session_id": { "type": "string" }
329 },
330 "required": ["session_id"]
331 }
332 }),
333 json!({
334 "name": "browser_content",
335 "description": "Get the full HTML content of the current page.",
336 "inputSchema": {
337 "type": "object",
338 "properties": {
339 "session_id": { "type": "string" }
340 },
341 "required": ["session_id"]
342 }
343 }),
344 #[cfg(feature = "mcp-attach")]
345 json!({
346 "name": "browser_attach",
347 "description": "Attach MCP workflows to an existing user browser/profile context. `cdp_ws` mode is implemented and creates a live attached session; `extension_bridge` remains a contract-only path.",
348 "inputSchema": {
349 "type": "object",
350 "properties": {
351 "mode": {
352 "type": "string",
353 "enum": ["extension_bridge", "cdp_ws"],
354 "description": "Attach strategy. extension_bridge is the recommended future path for existing user profiles. cdp_ws targets a remote debugging websocket endpoint."
355 },
356 "endpoint": {
357 "type": "string",
358 "description": "Optional endpoint for cdp_ws mode, e.g. ws://127.0.0.1:9222/devtools/browser/<id>."
359 },
360 "profile_hint": {
361 "type": "string",
362 "description": "Optional human-readable profile label (e.g. 'reddit-main')."
363 },
364 "target_profile": {
365 "type": "string",
366 "enum": ["default", "reddit"],
367 "description": "Optional target tuning profile used by session navigation helpers."
368 }
369 },
370 "required": ["mode"]
371 }
372 }),
373 json!({
374 "name": "browser_auth_session",
375 "description": "High-level auth/session workflow wrapper. Use mode='capture' to persist login state and mode='resume' to restore it.",
376 "inputSchema": {
377 "type": "object",
378 "properties": {
379 "session_id": { "type": "string" },
380 "mode": { "type": "string", "enum": ["capture", "resume"] },
381 "file_path": { "type": "string", "description": "Optional snapshot file path for durable persistence." },
382 "ttl_secs": { "type": "integer", "description": "Optional TTL (seconds) when capturing." },
383 "navigate_to_origin": { "type": "boolean", "default": true, "description": "When resuming, navigate to snapshot origin before restore." },
384 "interaction_level": { "type": "string", "enum": ["none", "low", "medium", "high"], "default": "none", "description": "Optional post-operation human-like interaction step." }
385 },
386 "required": ["session_id", "mode"]
387 }
388 }),
389 json!({
390 "name": "browser_release",
391 "description": "Release a browser session back to the pool.",
392 "inputSchema": {
393 "type": "object",
394 "properties": {
395 "session_id": { "type": "string" }
396 },
397 "required": ["session_id"]
398 }
399 }),
400 json!({
401 "name": "pool_stats",
402 "description": "Return current browser pool statistics.",
403 "inputSchema": {
404 "type": "object",
405 "properties": {},
406 "required": []
407 }
408 }),
409 ];
410 tools.push(json!({
411 "name": "browser_query",
412 "description": "Navigate to a URL, query all elements matching a CSS selector, and return their text content or specific attributes. If `fields` is omitted each result is a plain string (the text content). If `fields` is supplied each result is an object with one key per field.",
413 "inputSchema": {
414 "type": "object",
415 "properties": {
416 "session_id": { "type": "string" },
417 "url": { "type": "string" },
418 "selector": { "type": "string", "description": "CSS selector passed to querySelectorAll." },
419 "fields": {
420 "type": "object",
421 "description": "Map of output field name → { \"attr\": \"attribute-name\" }. Omit `attr` to get text content for that field.",
422 "additionalProperties": {
423 "type": "object",
424 "properties": { "attr": { "type": "string" } }
425 }
426 },
427 "limit": { "type": "integer", "default": 50, "description": "Maximum number of nodes to return." },
428 "timeout_secs": { "type": "number", "default": 30 }
429 },
430 "required": ["session_id", "url", "selector"]
431 }
432 }));
433 tools.push(json!({
434 "name": "browser_extract",
435 "description": "Navigate to a URL and perform schema-driven structured extraction. Each element matching `root_selector` becomes one result object; fields within each root are resolved by their own sub-selectors relative to the root. This is the runtime equivalent of the `#[derive(Extract)]` macro.",
436 "inputSchema": {
437 "type": "object",
438 "properties": {
439 "session_id": { "type": "string" },
440 "url": { "type": "string" },
441 "root_selector": { "type": "string", "description": "CSS selector whose matches become the root of each result object." },
442 "schema": {
443 "type": "object",
444 "description": "Map of field name → { \"selector\": \"...\", \"attr\": \"...\", \"required\": true/false }.",
445 "additionalProperties": {
446 "type": "object",
447 "properties": {
448 "selector": { "type": "string" },
449 "attr": { "type": "string" },
450 "required": { "type": "boolean", "default": false }
451 },
452 "required": ["selector"]
453 }
454 },
455 "timeout_secs": { "type": "number", "default": 30 }
456 },
457 "required": ["session_id", "url", "root_selector", "schema"]
458 }
459 }));
460 tools.push(json!({
461 "name": "browser_extract_with_fallback",
462 "description": "Like browser_extract but accepts multiple root selectors (tried in order). Returns the first selector that produces results. Useful when a site layout may have changed and you want to try modern markup before falling back to legacy selectors.",
463 "inputSchema": {
464 "type": "object",
465 "properties": {
466 "session_id": { "type": "string" },
467 "url": { "type": "string" },
468 "root_selectors": {
469 "type": "array",
470 "items": { "type": "string" },
471 "description": "CSS selectors tried in order; the first that produces results is used.",
472 "minItems": 1
473 },
474 "schema": {
475 "type": "object",
476 "description": "Map of field name → { \"selector\": \"...\", \"attr\": \"...\", \"required\": true/false }.",
477 "additionalProperties": {
478 "type": "object",
479 "properties": {
480 "selector": { "type": "string" },
481 "attr": { "type": "string" },
482 "required": { "type": "boolean", "default": false }
483 },
484 "required": ["selector"]
485 }
486 },
487 "timeout_secs": { "type": "number", "default": 30 }
488 },
489 "required": ["session_id", "url", "root_selectors", "schema"]
490 }
491 }));
492 tools.push(json!({
493 "name": "browser_extract_resilient",
494 "description": "Like browser_extract but skips root nodes where *all* required schema fields are absent (partial records). Useful for heterogeneous lists where some items lack an optional field.",
495 "inputSchema": {
496 "type": "object",
497 "properties": {
498 "session_id": { "type": "string" },
499 "url": { "type": "string" },
500 "root_selector": { "type": "string", "description": "CSS selector whose matches become the root of each result object." },
501 "schema": {
502 "type": "object",
503 "description": "Map of field name → { \"selector\": \"...\", \"attr\": \"...\", \"required\": true/false }.",
504 "additionalProperties": {
505 "type": "object",
506 "properties": {
507 "selector": { "type": "string" },
508 "attr": { "type": "string" },
509 "required": { "type": "boolean", "default": false }
510 },
511 "required": ["selector"]
512 }
513 },
514 "timeout_secs": { "type": "number", "default": 30 }
515 },
516 "required": ["session_id", "url", "root_selector", "schema"]
517 }
518 }));
519 #[cfg(feature = "similarity")]
521 tools.push(json!({
522 "name": "browser_find_similar",
523 "description": "Navigate to a URL and find DOM elements that are structurally similar to a reference element (identified by a CSS selector). Useful when a site has been redesigned and stored selectors no longer match. Requires the `similarity` feature.",
524 "inputSchema": {
525 "type": "object",
526 "properties": {
527 "session_id": { "type": "string" },
528 "url": { "type": "string" },
529 "reference_selector": { "type": "string", "description": "CSS selector identifying the reference node. The first match is used." },
530 "threshold": { "type": "number", "default": 0.7, "description": "Minimum similarity score [0.0, 1.0]." },
531 "max_results": { "type": "integer", "default": 10 },
532 "timeout_secs": { "type": "number", "default": 30 }
533 },
534 "required": ["session_id", "url", "reference_selector"]
535 }
536 }));
537 #[cfg(feature = "stealth")]
539 tools.push(json!({
540 "name": "browser_verify_stealth",
541 "description": "Navigate to a URL and run built-in stealth checks with optional transport diagnostics (JA3/JA4/HTTP3). Returns a DiagnosticReport with pass/fail results, coverage percentage, transport mismatch details, and known_limitations for visible-but-not-yet-covered surfaces.",
542 "inputSchema": {
543 "type": "object",
544 "properties": {
545 "session_id": { "type": "string" },
546 "url": { "type": "string", "description": "URL to navigate to before running checks." },
547 "timeout_secs": { "type": "integer", "default": 15, "description": "Navigation timeout in seconds." },
548 "observed_ja3_hash": { "type": "string", "description": "Optional observed JA3 hash to compare against expected profile." },
549 "observed_ja4": { "type": "string", "description": "Optional observed JA4 fingerprint to compare against expected profile." },
550 "observed_http3_perk_text": { "type": "string", "description": "Optional observed HTTP/3 perk text (SETTINGS|PSEUDO_HEADERS)." },
551 "observed_http3_perk_hash": { "type": "string", "description": "Optional observed HTTP/3 perk hash." }
552 },
553 "required": ["session_id", "url"]
554 }
555 }));
556 #[cfg(feature = "stealth")]
558 tools.push(json!({
559 "name": "browser_validate_stealth",
560 "description": "Run anti-bot service validators against the pool (Tier 1: CreepJS, BrowserScan). Returns a summary report.",
561 "inputSchema": {
562 "type": "object",
563 "properties": {
564 "targets": {
565 "type": "array",
566 "items": { "type": "string", "enum": ["creepjs", "browserscan", "fingerprint_js", "kasada", "cloudflare", "akamai", "data_dome", "perimeter_x"] },
567 "description": "List of services to validate. Empty = Tier 1 only. Tier 2+ tests may rate-limit.",
568 "default": ["creepjs", "browserscan"]
569 },
570 "tier1_only": {
571 "type": "boolean",
572 "default": false,
573 "description": "If true, force regression-safe Tier 1 targets only (CreepJS + BrowserScan)."
574 },
575 "timeout_secs": { "type": "integer", "default": 30, "description": "Per-target timeout in seconds." }
576 },
577 "required": []
578 }
579 }));
580 tools.push(json!({
582 "name": "browser_warmup",
583 "description": "Warm up a browser session by navigating to a URL and optionally waiting for dynamic resources to settle. Warmup is idempotent — calling it again re-warms the same session.",
584 "inputSchema": {
585 "type": "object",
586 "properties": {
587 "session_id": { "type": "string" },
588 "url": { "type": "string", "description": "URL to navigate to during warmup." },
589 "wait": {
590 "type": "string",
591 "enum": ["dom_content_loaded", "network_idle"],
592 "default": "dom_content_loaded",
593 "description": "Wait strategy after navigation."
594 },
595 "timeout_ms": { "type": "integer", "default": 30000, "description": "Navigation timeout in milliseconds." },
596 "stabilize_ms": { "type": "integer", "default": 0, "description": "Additional pause after navigation for dynamic resources to settle (0 = skip)." }
597 },
598 "required": ["session_id", "url"]
599 }
600 }));
601 tools.push(json!({
602 "name": "browser_refresh",
603 "description": "Refresh the current page while retaining cookies and session storage. Optionally re-navigates to force a new TCP connection.",
604 "inputSchema": {
605 "type": "object",
606 "properties": {
607 "session_id": { "type": "string" },
608 "wait": {
609 "type": "string",
610 "enum": ["dom_content_loaded", "network_idle"],
611 "default": "dom_content_loaded",
612 "description": "Wait strategy after reload."
613 },
614 "timeout_ms": { "type": "integer", "default": 30000, "description": "Reload timeout in milliseconds." },
615 "reset_connection": { "type": "boolean", "default": false, "description": "When true, re-navigates to force a new TCP connection instead of in-place reload." }
616 },
617 "required": ["session_id"]
618 }
619 }));
620 tools.push(json!({
621 "name": "browser_session_save",
622 "description": "Save current browser session state (cookies + localStorage) to memory and optionally to disk.",
623 "inputSchema": {
624 "type": "object",
625 "properties": {
626 "session_id": { "type": "string" },
627 "ttl_secs": { "type": "integer", "description": "Optional snapshot TTL in seconds." },
628 "file_path": { "type": "string", "description": "Optional path to save session snapshot JSON." },
629 "include_snapshot": { "type": "boolean", "default": false, "description": "When true, include full snapshot payload in response." }
630 },
631 "required": ["session_id"]
632 }
633 }));
634 tools.push(json!({
635 "name": "browser_session_restore",
636 "description": "Restore browser session state from provided snapshot JSON, saved in-memory snapshot, or file.",
637 "inputSchema": {
638 "type": "object",
639 "properties": {
640 "session_id": { "type": "string" },
641 "snapshot": { "type": "object", "description": "Inline SessionSnapshot JSON." },
642 "file_path": { "type": "string", "description": "Path to a SessionSnapshot JSON file." },
643 "use_saved": { "type": "boolean", "default": true, "description": "Use in-memory snapshot when no inline/file snapshot is provided." },
644 "navigate_to_origin": { "type": "boolean", "default": true, "description": "Navigate to snapshot origin before restore when origin is present." }
645 },
646 "required": ["session_id"]
647 }
648 }));
649 tools.push(json!({
650 "name": "browser_humanize",
651 "description": "Apply human-like interaction sequence on current page (scroll, key activity, mouse movement).",
652 "inputSchema": {
653 "type": "object",
654 "properties": {
655 "session_id": { "type": "string" },
656 "level": { "type": "string", "enum": ["none", "low", "medium", "high"], "default": "low" },
657 "viewport_width": { "type": "number", "default": 1366.0 },
658 "viewport_height": { "type": "number", "default": 768.0 }
659 },
660 "required": ["session_id"]
661 }
662 }));
663 tools.push(json!({
664 "name": "browser_apply_behavior_json",
665 "description": "Apply structured behavior JSON (runtime policy, investigation bundle, or direct overrides) using the polymorphic behavior adapter. Returns an applied plan and effective browser config. If session_id is provided, session metadata is updated for downstream tools.",
666 "inputSchema": {
667 "type": "object",
668 "properties": {
669 "behavior": {
670 "type": "object",
671 "description": "Structured behavior input: RuntimePolicy object, InvestigationBundle object with nested policy, or direct override object."
672 },
673 "session_id": {
674 "type": "string",
675 "description": "Optional active session to annotate with the applied behavior plan."
676 }
677 },
678 "required": ["behavior"]
679 }
680 }));
681 tools
682});
683
684pub struct McpBrowserServer {
685 pool: Arc<BrowserPool>,
686 sessions: Arc<Mutex<HashMap<String, McpSession>>>,
687}
688
689struct ExtractFieldDef {
691 selector: String,
692 attr: Option<String>,
693 required: bool,
694}
695
696impl McpBrowserServer {
697 #[must_use]
701 pub fn new(pool: Arc<BrowserPool>) -> Self {
702 Self {
703 pool,
704 sessions: Arc::new(Mutex::new(HashMap::new())),
705 }
706 }
707
708 pub async fn run(&self) -> Result<()> {
717 info!("MCP browser server starting (stdin/stdout mode)");
718
719 let stdin = tokio::io::stdin();
720 let stdout = tokio::io::stdout();
721 let mut reader = BufReader::new(stdin).lines();
722 let mut stdout = stdout;
723
724 while let Some(line) = reader.next_line().await.map_err(BrowserError::Io)? {
725 let line = line.trim().to_string();
726 if line.is_empty() {
727 continue;
728 }
729
730 debug!(?line, "MCP request");
731
732 let response = match serde_json::from_str::<Value>(&line) {
733 Ok(req) => {
734 let is_well_formed_notification = req.is_object()
735 && req.get("jsonrpc").and_then(Value::as_str) == Some("2.0")
736 && req.get("id").is_none()
737 && req.get("method").and_then(Value::as_str).is_some();
738 let response = self.dispatch(&req).await;
739 if is_well_formed_notification {
740 continue;
741 }
742 response
743 }
744 Err(e) => serde_json::to_value(JsonRpcResponse::err(
745 Value::Null,
746 -32700,
747 format!("Parse error: {e}"),
748 ))
749 .unwrap_or_else(|_| {
750 json!({"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}})
751 }),
752 };
753
754 let mut out = serde_json::to_string(&response).unwrap_or_default();
755 out.push('\n');
756 stdout
757 .write_all(out.as_bytes())
758 .await
759 .map_err(BrowserError::Io)?;
760 stdout.flush().await.map_err(BrowserError::Io)?;
761 }
762
763 info!("MCP browser server stopping (stdin closed)");
764 Ok(())
765 }
766
767 pub async fn dispatch(&self, req: &Value) -> Value {
790 let typed: JsonRpcRequest = match serde_json::from_value(req.clone()) {
791 Ok(r) => r,
792 Err(e) => {
793 return json!({
794 "jsonrpc": "2.0",
795 "id": req.get("id").cloned().unwrap_or(Value::Null),
796 "error": { "code": -32700, "message": format!("Parse error: {e}") }
797 });
798 }
799 };
800 let resp = self.handle_request(typed).await;
801 serde_json::to_value(resp).unwrap_or_else(|_| json!({"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}}))
802 }
803
804 async fn handle_request(&self, req: JsonRpcRequest) -> JsonRpcResponse {
805 let id = req.id.clone();
806 match req.method.as_str() {
807 "initialize" => Self::handle_initialize(id),
808 "tools/list" => Self::handle_tools_list(id),
809 "tools/call" => self.handle_tools_call(id, req.params).await,
810 "resources/list" => self.handle_resources_list(id).await,
811 "resources/read" => self.handle_resources_read(id, req.params).await,
812 "notifications/initialized" | "ping" => {
813 JsonRpcResponse::ok(id, json!({}))
815 }
816 other => JsonRpcResponse::method_not_found(id, other),
817 }
818 }
819
820 fn handle_initialize(id: Value) -> JsonRpcResponse {
823 JsonRpcResponse::ok(
824 id,
825 json!({
826 "protocolVersion": "2025-11-25",
827 "capabilities": {
828 "tools": { "listChanged": false },
829 "resources": { "listChanged": false, "subscribe": false }
830 },
831 "serverInfo": {
832 "name": "stygian-browser",
833 "version": env!("CARGO_PKG_VERSION")
834 }
835 }),
836 )
837 }
838
839 fn handle_tools_list(id: Value) -> JsonRpcResponse {
842 JsonRpcResponse::ok(id, json!({ "tools": &*TOOL_DEFINITIONS }))
843 }
844
845 async fn handle_tools_call(&self, id: Value, params: Value) -> JsonRpcResponse {
848 let name = match params.get("name").and_then(|v| v.as_str()) {
849 Some(n) => n.to_string(),
850 None => return JsonRpcResponse::err(id, -32602, "Missing tool 'name'"),
851 };
852 let args = params
853 .get("arguments")
854 .cloned()
855 .unwrap_or_else(|| json!({}));
856
857 let result = match name.as_str() {
858 "browser_acquire" => self.tool_browser_acquire(&args).await,
859 "browser_acquire_and_extract" => self.tool_browser_acquire_and_extract(&args).await,
860 "browser_navigate" => self.tool_browser_navigate(&args).await,
861 "browser_eval" => self.tool_browser_eval(&args).await,
862 "browser_screenshot" => self.tool_browser_screenshot(&args).await,
863 "browser_content" => self.tool_browser_content(&args).await,
864 #[cfg(feature = "mcp-attach")]
865 "browser_attach" => self.tool_browser_attach(&args).await,
866 #[cfg(not(feature = "mcp-attach"))]
867 "browser_attach" => Err(BrowserError::ConfigError(
868 "browser_attach requires the 'mcp-attach' feature".to_string(),
869 )),
870 "browser_auth_session" => self.tool_browser_auth_session(&args).await,
871 "browser_session_save" => self.tool_browser_session_save(&args).await,
872 "browser_session_restore" => self.tool_browser_session_restore(&args).await,
873 "browser_apply_behavior_json" => self.tool_browser_apply_behavior_json(&args).await,
874 "browser_humanize" => self.tool_browser_humanize(&args).await,
875 #[cfg(feature = "stealth")]
876 "browser_verify_stealth" => self.tool_browser_verify_stealth(&args).await,
877 #[cfg(not(feature = "stealth"))]
878 "browser_verify_stealth" => Err(BrowserError::ConfigError(
879 "browser_verify_stealth requires the 'stealth' feature".to_string(),
880 )),
881 #[cfg(feature = "stealth")]
882 "browser_validate_stealth" => self.tool_browser_validate_stealth(&args).await,
883 #[cfg(not(feature = "stealth"))]
884 "browser_validate_stealth" => Err(BrowserError::ConfigError(
885 "browser_validate_stealth requires the 'stealth' feature".to_string(),
886 )),
887 "browser_release" => self.tool_browser_release(&args).await,
888 "pool_stats" => Ok(self.tool_pool_stats()),
889 "browser_query" => self.tool_browser_query(&args).await,
890 "browser_extract" => self.tool_browser_extract(&args).await,
891 "browser_extract_with_fallback" => self.tool_browser_extract_with_fallback(&args).await,
892 "browser_extract_resilient" => self.tool_browser_extract_resilient(&args).await,
893 #[cfg(feature = "similarity")]
894 "browser_find_similar" => self.tool_browser_find_similar(&args).await,
895 "browser_warmup" => self.tool_browser_warmup(&args).await,
896 "browser_refresh" => self.tool_browser_refresh(&args).await,
897 other => Err(BrowserError::ConfigError(format!("Unknown tool: {other}"))),
898 };
899
900 match result {
901 Ok(content) => JsonRpcResponse::ok(
902 id,
903 json!({ "content": [{ "type": "text", "text": content.to_string() }], "isError": false }),
904 ),
905 Err(e) => JsonRpcResponse::ok(
906 id,
907 json!({ "content": [{ "type": "text", "text": e.to_string() }], "isError": true }),
908 ),
909 }
910 }
911
912 async fn tool_browser_acquire(&self, args: &Value) -> Result<Value> {
913 let stealth_level = args
915 .get("stealth_level")
916 .and_then(|v| v.as_str())
917 .map(|s| match s {
918 "none" => StealthLevel::None,
919 "basic" => StealthLevel::Basic,
920 _ => StealthLevel::Advanced,
921 })
922 .unwrap_or_default();
923 let tls_profile = args
924 .get("tls_profile")
925 .and_then(|v| v.as_str())
926 .map(ToString::to_string);
927 let webrtc_policy = args
928 .get("webrtc_policy")
929 .and_then(|v| v.as_str())
930 .map(ToString::to_string);
931 let cdp_fix_mode = args
932 .get("cdp_fix_mode")
933 .and_then(|v| v.as_str())
934 .map(ToString::to_string);
935 let proxy = args
936 .get("proxy")
937 .and_then(|v| v.as_str())
938 .map(ToString::to_string);
939 let target_profile = args
940 .get("target_profile")
941 .and_then(|v| v.as_str())
942 .map_or_else(
943 || "default".to_string(),
944 |s| {
945 if s.eq_ignore_ascii_case("reddit") {
946 "reddit".to_string()
947 } else {
948 "default".to_string()
949 }
950 },
951 );
952
953 let handle = self.pool.acquire().await?;
954 let session_id = Ulid::new().to_string();
955
956 let effective_stealth = format!("{stealth_level:?}").to_lowercase();
957 self.sessions.lock().await.insert(
958 session_id.clone(),
959 McpSession {
960 handle: Arc::new(Mutex::new(Some(handle))),
961 attached_browser: Arc::new(Mutex::new(None)),
962 attached_handler_task: Arc::new(Mutex::new(None)),
963 page: Arc::new(Mutex::new(None)),
964 stealth_level,
965 tls_profile: tls_profile.clone(),
966 webrtc_policy: webrtc_policy.clone(),
967 cdp_fix_mode: cdp_fix_mode.clone(),
968 proxy: proxy.clone(),
969 target_profile: target_profile.clone(),
970 current_url: None,
971 saved_snapshot: None,
972 attach_endpoint: None,
973 behavior_plan: None,
974 },
975 );
976
977 info!(%session_id, %effective_stealth, "MCP session acquired");
978 Ok(json!({
979 "session_id": session_id,
980 "requested_metadata": {
981 "stealth_level": effective_stealth,
982 "tls_profile": tls_profile,
983 "webrtc_policy": webrtc_policy,
984 "cdp_fix_mode": cdp_fix_mode,
985 "proxy": proxy,
986 "target_profile": target_profile
987 }
988 }))
989 }
990
991 async fn tool_browser_acquire_and_extract(&self, args: &Value) -> Result<Value> {
992 let request = Self::parse_acquisition_request(args)?;
993 let runner = AcquisitionRunner::new(self.pool.clone());
994 let result = runner.run(request).await;
995 Ok(Self::acquisition_result_to_tool_output(&result))
996 }
997
998 #[cfg(feature = "stealth")]
999 async fn tool_browser_verify_stealth(&self, args: &Value) -> Result<Value> {
1000 let session_id = Self::require_str(args, "session_id")?;
1001 let url = Self::require_str(args, "url")?;
1002 let timeout_secs = args
1003 .get("timeout_secs")
1004 .and_then(serde_json::Value::as_u64)
1005 .unwrap_or(15);
1006 let observed = crate::diagnostic::TransportObservations {
1007 ja3_hash: args
1008 .get("observed_ja3_hash")
1009 .and_then(serde_json::Value::as_str)
1010 .map(ToString::to_string),
1011 ja4: args
1012 .get("observed_ja4")
1013 .and_then(serde_json::Value::as_str)
1014 .map(ToString::to_string),
1015 http3_perk_text: args
1016 .get("observed_http3_perk_text")
1017 .and_then(serde_json::Value::as_str)
1018 .map(ToString::to_string),
1019 http3_perk_hash: args
1020 .get("observed_http3_perk_hash")
1021 .and_then(serde_json::Value::as_str)
1022 .map(ToString::to_string),
1023 };
1024
1025 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1026 self.session_runtime(&session_id).await?;
1027 let requested_stealth = self.session_handle_and_stealth(&session_id).await?.1;
1028
1029 self.ensure_session_page(
1030 &session_id,
1031 &session_arc,
1032 &attached_browser_arc,
1033 &page_arc,
1034 None,
1035 Duration::from_secs(timeout_secs),
1036 reddit_profile,
1037 )
1038 .await?;
1039
1040 {
1041 let mut page_guard = page_arc.lock().await;
1042 let page = page_guard.as_mut().ok_or_else(|| {
1043 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1044 })?;
1045 Self::navigate_with_profile(
1046 page,
1047 &url,
1048 Duration::from_secs(timeout_secs),
1049 reddit_profile,
1050 )
1051 .await?;
1052 drop(page_guard);
1053 }
1054
1055 let mut result = {
1056 let page_guard = page_arc.lock().await;
1057 let page = page_guard.as_ref().ok_or_else(|| {
1058 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1059 })?;
1060 let result = Self::run_stealth_diagnostic(page, observed).await;
1061 drop(page_guard);
1062 result
1063 };
1064
1065 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1066 session.current_url = Some(url.clone());
1067 }
1068
1069 if let Ok(ref mut v) = result
1071 && let Some(obj) = v.as_object_mut()
1072 {
1073 obj.insert(
1074 "requested_stealth_level".to_string(),
1075 Value::String(requested_stealth),
1076 );
1077 }
1078 result
1079 }
1080
1081 #[cfg(feature = "stealth")]
1082 async fn run_stealth_diagnostic(
1083 page: &crate::page::PageHandle,
1084 observed: crate::diagnostic::TransportObservations,
1085 ) -> Result<Value> {
1086 let report = page.verify_stealth_with_transport(Some(observed)).await?;
1087 serde_json::to_value(&report)
1088 .map_err(|e| BrowserError::ConfigError(format!("failed to serialize report: {e}")))
1089 }
1090
1091 async fn tool_browser_navigate(&self, args: &Value) -> Result<Value> {
1092 let session_id = Self::require_str(args, "session_id")?;
1093 let url = Self::require_str(args, "url")?;
1094 let timeout_secs = args
1095 .get("timeout_secs")
1096 .and_then(serde_json::Value::as_f64)
1097 .unwrap_or(30.0);
1098
1099 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1100 self.session_runtime(&session_id).await?;
1101
1102 self.ensure_session_page(
1103 &session_id,
1104 &session_arc,
1105 &attached_browser_arc,
1106 &page_arc,
1107 None,
1108 Duration::from_secs_f64(timeout_secs),
1109 reddit_profile,
1110 )
1111 .await?;
1112
1113 let (challenge_detected, challenge_cleared, title) = {
1114 let mut page_guard = page_arc.lock().await;
1115 let page = page_guard.as_mut().ok_or_else(|| {
1116 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1117 })?;
1118
1119 let (challenge_detected, challenge_cleared) = Self::navigate_with_profile(
1120 page,
1121 &url,
1122 Duration::from_secs_f64(timeout_secs),
1123 reddit_profile,
1124 )
1125 .await?;
1126 let title = page.title().await.unwrap_or_default();
1127 drop(page_guard);
1128 (challenge_detected, challenge_cleared, title)
1129 };
1130
1131 let current_url = url.clone();
1132
1133 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1136 session.current_url = Some(current_url.clone());
1137 }
1138
1139 Ok(json!({
1140 "title": title,
1141 "url": current_url,
1142 "challenge_detected": challenge_detected,
1143 "challenge_cleared": challenge_cleared
1144 }))
1145 }
1146
1147 async fn tool_browser_eval(&self, args: &Value) -> Result<Value> {
1148 let session_id = Self::require_str(args, "session_id")?;
1149 let script = Self::require_str(args, "script")?;
1150 let timeout_secs = args
1151 .get("timeout_secs")
1152 .and_then(serde_json::Value::as_f64)
1153 .unwrap_or(30.0);
1154
1155 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1156 self.session_runtime(&session_id).await?;
1157 let nav_url = nav_url_opt.ok_or_else(|| {
1158 BrowserError::ConfigError(
1159 "No page loaded — call browser_navigate before browser_eval".to_string(),
1160 )
1161 })?;
1162
1163 self.ensure_session_page(
1164 &session_id,
1165 &session_arc,
1166 &attached_browser_arc,
1167 &page_arc,
1168 Some(nav_url.as_str()),
1169 Duration::from_secs_f64(timeout_secs),
1170 reddit_profile,
1171 )
1172 .await?;
1173
1174 let mut page_guard = page_arc.lock().await;
1175 let page = page_guard.as_mut().ok_or_else(|| {
1176 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1177 })?;
1178 let result: Value = page.eval(&script).await?;
1179 drop(page_guard);
1180
1181 Ok(json!({ "result": result }))
1182 }
1183
1184 async fn tool_browser_screenshot(&self, args: &Value) -> Result<Value> {
1185 use base64::Engine as _;
1186 let session_id = Self::require_str(args, "session_id")?;
1187 let timeout_secs = args
1188 .get("timeout_secs")
1189 .and_then(serde_json::Value::as_f64)
1190 .unwrap_or(30.0);
1191
1192 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1193 self.session_runtime(&session_id).await?;
1194 let nav_url = nav_url_opt.ok_or_else(|| {
1195 BrowserError::ConfigError(
1196 "No page loaded — call browser_navigate before browser_screenshot".to_string(),
1197 )
1198 })?;
1199
1200 self.ensure_session_page(
1201 &session_id,
1202 &session_arc,
1203 &attached_browser_arc,
1204 &page_arc,
1205 Some(nav_url.as_str()),
1206 Duration::from_secs_f64(timeout_secs),
1207 reddit_profile,
1208 )
1209 .await?;
1210
1211 let mut page_guard = page_arc.lock().await;
1212 let page = page_guard.as_mut().ok_or_else(|| {
1213 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1214 })?;
1215 let png_bytes = page.screenshot().await?;
1216 drop(page_guard);
1217
1218 let encoded = base64::engine::general_purpose::STANDARD.encode(&png_bytes);
1219 Ok(json!({ "data": encoded, "mimeType": "image/png", "bytes": png_bytes.len() }))
1220 }
1221
1222 async fn tool_browser_content(&self, args: &Value) -> Result<Value> {
1223 let session_id = Self::require_str(args, "session_id")?;
1224 let timeout_secs = args
1225 .get("timeout_secs")
1226 .and_then(serde_json::Value::as_f64)
1227 .unwrap_or(30.0);
1228
1229 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1230 self.session_runtime(&session_id).await?;
1231 let nav_url = nav_url_opt.ok_or_else(|| {
1232 BrowserError::ConfigError(
1233 "No page loaded — call browser_navigate before browser_content".to_string(),
1234 )
1235 })?;
1236
1237 self.ensure_session_page(
1238 &session_id,
1239 &session_arc,
1240 &attached_browser_arc,
1241 &page_arc,
1242 Some(nav_url.as_str()),
1243 Duration::from_secs_f64(timeout_secs),
1244 reddit_profile,
1245 )
1246 .await?;
1247
1248 let mut page_guard = page_arc.lock().await;
1249 let page = page_guard.as_mut().ok_or_else(|| {
1250 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1251 })?;
1252 let html = page.content().await?;
1253 drop(page_guard);
1254
1255 Ok(json!({ "html": html, "bytes": html.len() }))
1256 }
1257
1258 #[cfg(feature = "mcp-attach")]
1259 async fn tool_browser_attach(&self, args: &Value) -> Result<Value> {
1260 let mode = Self::require_str(args, "mode")?;
1261 let endpoint = args
1262 .get("endpoint")
1263 .and_then(Value::as_str)
1264 .map(ToString::to_string);
1265 let profile_hint = args
1266 .get("profile_hint")
1267 .and_then(Value::as_str)
1268 .map(ToString::to_string);
1269
1270 let target_profile = args
1271 .get("target_profile")
1272 .and_then(Value::as_str)
1273 .map_or_else(
1274 || "default".to_string(),
1275 |s| {
1276 if s.eq_ignore_ascii_case("reddit") {
1277 "reddit".to_string()
1278 } else {
1279 "default".to_string()
1280 }
1281 },
1282 );
1283
1284 match mode.as_str() {
1285 "extension_bridge" => Ok(json!({
1286 "supported": false,
1287 "mode": mode,
1288 "profile_hint": profile_hint,
1289 "status": "not_implemented",
1290 "next_step": "Implement extension bridge handshake and profile transfer"
1291 })),
1292 "cdp_ws" => {
1293 let endpoint = endpoint.ok_or_else(|| {
1294 BrowserError::ConfigError("missing endpoint for cdp_ws mode".to_string())
1295 })?;
1296 if !(endpoint.starts_with("ws://") || endpoint.starts_with("wss://")) {
1297 return Err(BrowserError::ConfigError(
1298 "endpoint must start with ws:// or wss://".to_string(),
1299 ));
1300 }
1301
1302 let attach_timeout = Duration::from_secs(10);
1303 let (browser, mut handler) =
1304 tokio::time::timeout(attach_timeout, Browser::connect(endpoint.clone()))
1305 .await
1306 .map_err(|_| BrowserError::Timeout {
1307 operation: "Browser.connect".to_string(),
1308 duration_ms: 10_000,
1309 })?
1310 .map_err(|e| BrowserError::ConnectionError {
1311 url: endpoint.clone(),
1312 reason: e.to_string(),
1313 })?;
1314
1315 let handler_task = tokio::spawn(async move {
1316 while let Some(event) = handler.next().await {
1317 if let Err(error) = event {
1318 tracing::warn!(%error, "attached browser handler error");
1320 break;
1321 }
1322 }
1323 });
1324
1325 let session_id = Ulid::new().to_string();
1326 self.sessions.lock().await.insert(
1327 session_id.clone(),
1328 McpSession {
1329 handle: Arc::new(Mutex::new(None)),
1330 attached_browser: Arc::new(Mutex::new(Some(browser))),
1331 attached_handler_task: Arc::new(Mutex::new(Some(handler_task))),
1332 page: Arc::new(Mutex::new(None)),
1333 stealth_level: StealthLevel::None,
1334 tls_profile: None,
1335 webrtc_policy: None,
1336 cdp_fix_mode: None,
1337 proxy: None,
1338 target_profile: target_profile.clone(),
1339 current_url: None,
1340 saved_snapshot: None,
1341 attach_endpoint: Some(endpoint.clone()),
1342 behavior_plan: None,
1343 },
1344 );
1345
1346 Ok(json!({
1347 "supported": true,
1348 "mode": "cdp_ws",
1349 "session_id": session_id,
1350 "endpoint": endpoint,
1351 "profile_hint": profile_hint,
1352 "requested_metadata": {
1353 "target_profile": target_profile
1354 }
1355 }))
1356 }
1357 other => Err(BrowserError::ConfigError(format!(
1358 "Invalid mode '{other}'. Use one of: extension_bridge, cdp_ws"
1359 ))),
1360 }
1361 }
1362
1363 async fn tool_browser_auth_session(&self, args: &Value) -> Result<Value> {
1364 let session_id = Self::require_str(args, "session_id")?;
1365 let mode = Self::require_str(args, "mode")?;
1366 let file_path = args
1367 .get("file_path")
1368 .and_then(Value::as_str)
1369 .map(ToString::to_string);
1370 let ttl_secs = args.get("ttl_secs").and_then(Value::as_u64);
1371 let navigate_to_origin = args
1372 .get("navigate_to_origin")
1373 .and_then(Value::as_bool)
1374 .unwrap_or(true);
1375 let interaction_level = args
1376 .get("interaction_level")
1377 .and_then(Value::as_str)
1378 .unwrap_or("none")
1379 .to_string();
1380
1381 let payload = match mode.as_str() {
1382 "capture" => {
1383 let mut save_args = json!({
1384 "session_id": session_id,
1385 "include_snapshot": false
1386 });
1387 if let Some(ttl) = ttl_secs
1388 && let Some(obj) = save_args.as_object_mut()
1389 {
1390 obj.insert("ttl_secs".to_string(), Value::from(ttl));
1391 }
1392 if let Some(path) = file_path.clone()
1393 && let Some(obj) = save_args.as_object_mut()
1394 {
1395 obj.insert("file_path".to_string(), Value::String(path));
1396 }
1397
1398 let save = self.tool_browser_session_save(&save_args).await?;
1399
1400 let humanize = if interaction_level == "none" {
1401 None
1402 } else {
1403 let humanize_args = json!({
1404 "session_id": session_id,
1405 "level": interaction_level
1406 });
1407 Some(self.tool_browser_humanize(&humanize_args).await?)
1408 };
1409
1410 json!({
1411 "mode": "capture",
1412 "session_id": session_id,
1413 "save": save,
1414 "humanize": humanize
1415 })
1416 }
1417 "resume" => {
1418 let mut restore_args = json!({
1419 "session_id": session_id,
1420 "use_saved": file_path.is_none(),
1421 "navigate_to_origin": navigate_to_origin
1422 });
1423 if let Some(path) = file_path.clone()
1424 && let Some(obj) = restore_args.as_object_mut()
1425 {
1426 obj.insert("file_path".to_string(), Value::String(path));
1427 }
1428
1429 let restore = self.tool_browser_session_restore(&restore_args).await?;
1430
1431 let humanize = if interaction_level == "none" {
1432 None
1433 } else {
1434 let humanize_args = json!({
1435 "session_id": session_id,
1436 "level": interaction_level
1437 });
1438 Some(self.tool_browser_humanize(&humanize_args).await?)
1439 };
1440
1441 json!({
1442 "mode": "resume",
1443 "session_id": session_id,
1444 "restore": restore,
1445 "humanize": humanize
1446 })
1447 }
1448 other => {
1449 return Err(BrowserError::ConfigError(format!(
1450 "Invalid mode '{other}'. Use one of: capture, resume"
1451 )));
1452 }
1453 };
1454
1455 Ok(payload)
1456 }
1457
1458 async fn tool_browser_session_save(&self, args: &Value) -> Result<Value> {
1459 let session_id = Self::require_str(args, "session_id")?;
1460 let ttl_secs = args.get("ttl_secs").and_then(Value::as_u64);
1461 let file_path = args
1462 .get("file_path")
1463 .and_then(Value::as_str)
1464 .map(ToString::to_string);
1465 let include_snapshot = args
1466 .get("include_snapshot")
1467 .and_then(Value::as_bool)
1468 .unwrap_or(false);
1469
1470 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1471 self.session_runtime(&session_id).await?;
1472
1473 self.ensure_session_page(
1474 &session_id,
1475 &session_arc,
1476 &attached_browser_arc,
1477 &page_arc,
1478 nav_url_opt.as_deref(),
1479 Duration::from_secs(30),
1480 reddit_profile,
1481 )
1482 .await?;
1483
1484 let mut snapshot = {
1485 let page_guard = page_arc.lock().await;
1486 let page = page_guard.as_ref().ok_or_else(|| {
1487 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1488 })?;
1489 let saved = save_session(page).await?;
1490 drop(page_guard);
1491 saved
1492 };
1493
1494 snapshot.ttl_secs = ttl_secs;
1495 if let Some(path) = &file_path {
1496 snapshot.save_to_file(path)?;
1497 }
1498
1499 let cookie_count = snapshot.cookies.len();
1500 let local_storage_keys = snapshot.local_storage.len();
1501 let origin = snapshot.origin.clone();
1502
1503 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1504 session.saved_snapshot = Some(snapshot.clone());
1505 }
1506
1507 let mut out = json!({
1508 "session_id": session_id,
1509 "origin": origin,
1510 "cookie_count": cookie_count,
1511 "local_storage_keys": local_storage_keys,
1512 "ttl_secs": ttl_secs,
1513 "saved_to_file": file_path
1514 });
1515
1516 if include_snapshot && let Some(obj) = out.as_object_mut() {
1517 obj.insert(
1518 "snapshot".to_string(),
1519 serde_json::to_value(&snapshot).map_err(|e| {
1520 BrowserError::ConfigError(format!("failed to serialize session snapshot: {e}"))
1521 })?,
1522 );
1523 }
1524
1525 Ok(out)
1526 }
1527
1528 async fn tool_browser_session_restore(&self, args: &Value) -> Result<Value> {
1529 let session_id = Self::require_str(args, "session_id")?;
1530 let file_path = args
1531 .get("file_path")
1532 .and_then(Value::as_str)
1533 .map(ToString::to_string);
1534 let use_saved = args
1535 .get("use_saved")
1536 .and_then(Value::as_bool)
1537 .unwrap_or(true);
1538 let navigate_to_origin = args
1539 .get("navigate_to_origin")
1540 .and_then(Value::as_bool)
1541 .unwrap_or(true);
1542
1543 let snapshot = if let Some(path) = file_path.as_deref() {
1544 SessionSnapshot::load_from_file(path)?
1545 } else if let Some(inline) = args.get("snapshot") {
1546 serde_json::from_value::<SessionSnapshot>(inline.clone()).map_err(|e| {
1547 BrowserError::ConfigError(format!("invalid inline session snapshot: {e}"))
1548 })?
1549 } else if use_saved {
1550 self.sessions
1551 .lock()
1552 .await
1553 .get(&session_id)
1554 .and_then(|s| s.saved_snapshot.clone())
1555 .ok_or_else(|| {
1556 BrowserError::ConfigError(
1557 "No saved session snapshot found for this session".to_string(),
1558 )
1559 })?
1560 } else {
1561 return Err(BrowserError::ConfigError(
1562 "No restore source provided. Set one of: file_path, snapshot, or use_saved=true"
1563 .to_string(),
1564 ));
1565 };
1566
1567 let source = if file_path.is_some() {
1568 "file"
1569 } else if args.get("snapshot").is_some() {
1570 "inline"
1571 } else {
1572 "saved"
1573 };
1574
1575 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1576 self.session_runtime(&session_id).await?;
1577
1578 self.ensure_session_page(
1579 &session_id,
1580 &session_arc,
1581 &attached_browser_arc,
1582 &page_arc,
1583 nav_url_opt.as_deref(),
1584 Duration::from_secs(30),
1585 reddit_profile,
1586 )
1587 .await?;
1588
1589 {
1590 let mut page_guard = page_arc.lock().await;
1591 let page = page_guard.as_mut().ok_or_else(|| {
1592 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1593 })?;
1594
1595 if navigate_to_origin && !snapshot.origin.is_empty() {
1596 Self::navigate_with_profile(
1597 page,
1598 &snapshot.origin,
1599 Duration::from_secs(30),
1600 reddit_profile,
1601 )
1602 .await?;
1603 }
1604
1605 restore_session(page, &snapshot).await?;
1606 drop(page_guard);
1607 }
1608
1609 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1610 if !snapshot.origin.is_empty() {
1611 session.current_url = Some(snapshot.origin.clone());
1612 }
1613 session.saved_snapshot = Some(snapshot.clone());
1614 }
1615
1616 Ok(json!({
1617 "session_id": session_id,
1618 "source": source,
1619 "origin": snapshot.origin,
1620 "cookie_count": snapshot.cookies.len(),
1621 "local_storage_keys": snapshot.local_storage.len(),
1622 "snapshot_expired": snapshot.is_expired()
1623 }))
1624 }
1625
1626 async fn tool_browser_apply_behavior_json(&self, args: &Value) -> Result<Value> {
1627 let behavior = args.get("behavior").cloned().ok_or_else(|| {
1628 BrowserError::ConfigError("Missing required 'behavior' object".to_string())
1629 })?;
1630
1631 if !behavior.is_object() {
1632 return Err(BrowserError::ConfigError(
1633 "'behavior' must be a JSON object".to_string(),
1634 ));
1635 }
1636
1637 let adapter = PolymorphicBehaviorAdapter::from_json_value(behavior)?;
1638 let mut effective_config = BrowserConfig::default();
1639 let plan = adapter.apply(&mut effective_config);
1640 let adapter_kind = adapter.kind();
1641
1642 let session_id = args
1643 .get("session_id")
1644 .and_then(Value::as_str)
1645 .map(ToString::to_string);
1646
1647 let session_updated = if let Some(sid) = &session_id {
1648 let mut sessions = self.sessions.lock().await;
1649 let session = sessions
1650 .get_mut(sid)
1651 .ok_or_else(|| BrowserError::ConfigError(format!("Unknown session_id: {sid}")))?;
1652
1653 let cdp_fix_mode = serde_json::to_value(effective_config.cdp_fix_mode)
1654 .ok()
1655 .and_then(|value| value.as_str().map(ToString::to_string));
1656
1657 session.behavior_plan = Some(plan.clone());
1658 session.stealth_level = effective_config.stealth_level;
1659 session.cdp_fix_mode = cdp_fix_mode;
1660 session.proxy.clone_from(&effective_config.proxy);
1661
1662 #[cfg(feature = "stealth")]
1663 {
1664 session.webrtc_policy = Some(format!("{:?}", effective_config.webrtc.policy));
1665 }
1666
1667 drop(sessions);
1668 true
1669 } else {
1670 false
1671 };
1672
1673 let effective_view = json!({
1674 "headless": effective_config.headless,
1675 "stealth_level": effective_config.stealth_level,
1676 "proxy": effective_config.proxy,
1677 "window_size": effective_config.window_size,
1678 "cdp_fix_mode": effective_config.cdp_fix_mode,
1679 "args": effective_config.args
1680 });
1681
1682 Ok(json!({
1683 "adapter_kind": adapter_kind,
1684 "plan": plan,
1685 "effective_config": effective_view,
1686 "session_id": session_id,
1687 "session_updated": session_updated
1688 }))
1689 }
1690
1691 async fn tool_browser_humanize(&self, args: &Value) -> Result<Value> {
1692 let session_id = Self::require_str(args, "session_id")?;
1693 let default_level = {
1694 let sessions = self.sessions.lock().await;
1695 sessions
1696 .get(&session_id)
1697 .and_then(|s| s.behavior_plan.as_ref())
1698 .map_or(InteractionLevel::Low, |plan| match plan.interaction_level {
1699 BehaviorInteractionLevel::None => InteractionLevel::None,
1700 BehaviorInteractionLevel::Low => InteractionLevel::Low,
1701 BehaviorInteractionLevel::Medium => InteractionLevel::Medium,
1702 BehaviorInteractionLevel::High => InteractionLevel::High,
1703 })
1704 };
1705 let level = match args.get("level").and_then(Value::as_str) {
1706 Some("none") => InteractionLevel::None,
1707 Some("medium") => InteractionLevel::Medium,
1708 Some("high") => InteractionLevel::High,
1709 Some(_) => InteractionLevel::Low,
1710 None => default_level,
1711 };
1712 let viewport_width = args
1713 .get("viewport_width")
1714 .and_then(Value::as_f64)
1715 .unwrap_or(1366.0);
1716 let viewport_height = args
1717 .get("viewport_height")
1718 .and_then(Value::as_f64)
1719 .unwrap_or(768.0);
1720
1721 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1722 self.session_runtime(&session_id).await?;
1723
1724 self.ensure_session_page(
1725 &session_id,
1726 &session_arc,
1727 &attached_browser_arc,
1728 &page_arc,
1729 nav_url_opt.as_deref(),
1730 Duration::from_secs(30),
1731 reddit_profile,
1732 )
1733 .await?;
1734
1735 {
1736 let page_guard = page_arc.lock().await;
1737 let page = page_guard.as_ref().ok_or_else(|| {
1738 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1739 })?;
1740
1741 let mut simulator = InteractionSimulator::new(level);
1742 simulator
1743 .random_interaction(page.inner(), viewport_width, viewport_height)
1744 .await?;
1745 drop(page_guard);
1746 }
1747
1748 let level_str = match level {
1749 InteractionLevel::None => "none",
1750 InteractionLevel::Low => "low",
1751 InteractionLevel::Medium => "medium",
1752 InteractionLevel::High => "high",
1753 };
1754
1755 Ok(json!({
1756 "session_id": session_id,
1757 "level": level_str,
1758 "viewport_width": viewport_width,
1759 "viewport_height": viewport_height,
1760 "applied": true
1761 }))
1762 }
1763
1764 async fn tool_browser_query(&self, args: &Value) -> Result<Value> {
1765 let session_id = Self::require_str(args, "session_id")?;
1766 let url = Self::require_str(args, "url")?;
1767 let selector = Self::require_str(args, "selector")?;
1768 let limit = usize::try_from(
1769 args.get("limit")
1770 .and_then(serde_json::Value::as_u64)
1771 .unwrap_or(50),
1772 )
1773 .unwrap_or(50);
1774 let timeout_secs = args
1775 .get("timeout_secs")
1776 .and_then(serde_json::Value::as_f64)
1777 .unwrap_or(30.0);
1778
1779 let fields: Option<Vec<(String, Option<String>)>> =
1781 args.get("fields").and_then(|v| v.as_object()).map(|obj| {
1782 obj.iter()
1783 .map(|(k, v)| {
1784 let attr = v
1785 .get("attr")
1786 .and_then(serde_json::Value::as_str)
1787 .map(ToString::to_string);
1788 (k.clone(), attr)
1789 })
1790 .collect()
1791 });
1792
1793 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1794 self.session_runtime(&session_id).await?;
1795 self.ensure_session_page(
1796 &session_id,
1797 &session_arc,
1798 &attached_browser_arc,
1799 &page_arc,
1800 None,
1801 Duration::from_secs_f64(timeout_secs),
1802 reddit_profile,
1803 )
1804 .await?;
1805
1806 let mut page_guard = page_arc.lock().await;
1807 let page = page_guard.as_mut().ok_or_else(|| {
1808 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1809 })?;
1810
1811 Self::navigate_with_profile(
1812 page,
1813 &url,
1814 Duration::from_secs_f64(timeout_secs),
1815 reddit_profile,
1816 )
1817 .await?;
1818
1819 let all_nodes = page.query_selector_all(&selector).await?;
1820 let nodes = all_nodes.get(..limit).unwrap_or(&all_nodes);
1821 let mut results: Vec<Value> = Vec::with_capacity(nodes.len());
1822 if let Some(ref field_defs) = fields {
1823 for node in nodes {
1824 let mut obj = serde_json::Map::new();
1825 for (field_name, attr_name) in field_defs {
1826 let val = if let Some(attr) = attr_name {
1827 node.attr(attr)
1828 .await
1829 .map_or(Value::Null, |opt| opt.map_or(Value::Null, Value::String))
1830 } else {
1831 node.text_content().await.map_or(Value::Null, Value::String)
1832 };
1833 obj.insert(field_name.clone(), val);
1834 }
1835 results.push(Value::Object(obj));
1836 }
1837 } else {
1838 for node in nodes {
1839 let text = node.text_content().await.unwrap_or_default();
1840 results.push(Value::String(text));
1841 }
1842 }
1843 drop(page_guard);
1844 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1845 session.current_url = Some(url.clone());
1846 }
1847
1848 Ok(json!({
1849 "url": url,
1850 "selector": selector,
1851 "count": results.len(),
1852 "results": results
1853 }))
1854 }
1855
1856 async fn tool_browser_extract(&self, args: &Value) -> Result<Value> {
1857 let session_id = Self::require_str(args, "session_id")?;
1858 let url = Self::require_str(args, "url")?;
1859 let root_selector = Self::require_str(args, "root_selector")?;
1860 let timeout_secs = args
1861 .get("timeout_secs")
1862 .and_then(serde_json::Value::as_f64)
1863 .unwrap_or(30.0);
1864
1865 let schema_obj = args
1867 .get("schema")
1868 .and_then(|v| v.as_object())
1869 .ok_or_else(|| {
1870 BrowserError::ConfigError("Missing or non-object 'schema' argument".to_string())
1871 })?;
1872
1873 let schema: Vec<(String, ExtractFieldDef)> = schema_obj
1874 .iter()
1875 .filter_map(|(name, spec)| {
1876 let selector = spec
1877 .get("selector")
1878 .and_then(serde_json::Value::as_str)
1879 .map(ToString::to_string)?;
1880 let attr = spec
1881 .get("attr")
1882 .and_then(serde_json::Value::as_str)
1883 .map(ToString::to_string);
1884 let required = spec
1885 .get("required")
1886 .and_then(serde_json::Value::as_bool)
1887 .unwrap_or(false);
1888 Some((
1889 name.clone(),
1890 ExtractFieldDef {
1891 selector,
1892 attr,
1893 required,
1894 },
1895 ))
1896 })
1897 .collect();
1898
1899 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1900 self.session_runtime(&session_id).await?;
1901 self.ensure_session_page(
1902 &session_id,
1903 &session_arc,
1904 &attached_browser_arc,
1905 &page_arc,
1906 None,
1907 Duration::from_secs_f64(timeout_secs),
1908 reddit_profile,
1909 )
1910 .await?;
1911
1912 let mut page_guard = page_arc.lock().await;
1913 let page = page_guard.as_mut().ok_or_else(|| {
1914 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1915 })?;
1916
1917 Self::navigate_with_profile(
1918 page,
1919 &url,
1920 Duration::from_secs_f64(timeout_secs),
1921 reddit_profile,
1922 )
1923 .await?;
1924
1925 let roots = page.query_selector_all(&root_selector).await?;
1926 let mut results: Vec<Value> = Vec::with_capacity(roots.len());
1927 for root in &roots {
1928 if let Some(obj) = Self::extract_record(root, &schema).await {
1929 results.push(Value::Object(obj));
1930 }
1931 }
1932 drop(page_guard);
1933 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1934 session.current_url = Some(url.clone());
1935 }
1936
1937 Ok(json!({
1938 "url": url,
1939 "root_selector": root_selector,
1940 "count": results.len(),
1941 "results": results
1942 }))
1943 }
1944
1945 #[cfg(feature = "similarity")]
1946 async fn tool_browser_find_similar(&self, args: &Value) -> Result<Value> {
1947 use crate::similarity::SimilarityConfig;
1948
1949 let session_id = Self::require_str(args, "session_id")?;
1950 let url = Self::require_str(args, "url")?;
1951 let reference_selector = Self::require_str(args, "reference_selector")?;
1952 #[allow(clippy::cast_possible_truncation)]
1953 let threshold = args
1954 .get("threshold")
1955 .and_then(serde_json::Value::as_f64)
1956 .map_or(SimilarityConfig::DEFAULT_THRESHOLD, |v| v as f32);
1957 let max_results = usize::try_from(
1958 args.get("max_results")
1959 .and_then(serde_json::Value::as_u64)
1960 .unwrap_or(10),
1961 )
1962 .unwrap_or(10);
1963 let timeout_secs = args
1964 .get("timeout_secs")
1965 .and_then(serde_json::Value::as_f64)
1966 .unwrap_or(30.0);
1967
1968 let config = SimilarityConfig {
1969 threshold,
1970 max_results,
1971 };
1972
1973 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1974 self.session_runtime(&session_id).await?;
1975 self.ensure_session_page(
1976 &session_id,
1977 &session_arc,
1978 &attached_browser_arc,
1979 &page_arc,
1980 None,
1981 Duration::from_secs_f64(timeout_secs),
1982 reddit_profile,
1983 )
1984 .await?;
1985
1986 let mut page_guard = page_arc.lock().await;
1987 let page = page_guard.as_mut().ok_or_else(|| {
1988 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1989 })?;
1990
1991 Self::navigate_with_profile(
1992 page,
1993 &url,
1994 Duration::from_secs_f64(timeout_secs),
1995 reddit_profile,
1996 )
1997 .await?;
1998
1999 let refs = page.query_selector_all(&reference_selector).await?;
2001 let Some(reference) = refs.into_iter().next() else {
2002 return Ok(json!({
2003 "isError": true,
2004 "error": format!("Reference selector matched no elements: {reference_selector}")
2005 }));
2006 };
2007
2008 let ref_fp = reference.fingerprint().await?;
2009 let matches = page.find_similar(&reference, config).await?;
2010
2011 let mut match_results: Vec<Value> = Vec::with_capacity(matches.len());
2012 for m in &matches {
2013 let text = m.node.text_content().await.unwrap_or_default();
2014 let snippet = m.node.inner_html().await.unwrap_or_default();
2015 let snippet: String = snippet.chars().take(200).collect();
2016 match_results.push(json!({
2017 "score": m.score,
2018 "text": text,
2019 "outer_html_snippet": snippet
2020 }));
2021 }
2022 drop(page_guard);
2023 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
2024 session.current_url = Some(url.clone());
2025 }
2026
2027 Ok(json!({
2028 "url": url,
2029 "reference": {
2030 "tag": ref_fp.tag,
2031 "classes": ref_fp.classes,
2032 "attr_names": ref_fp.attr_names,
2033 "depth": ref_fp.depth
2034 },
2035 "count": match_results.len(),
2036 "matches": match_results
2037 }))
2038 }
2039
2040 async fn tool_browser_warmup(&self, args: &Value) -> Result<Value> {
2041 use crate::page::{WarmupOptions, WarmupWait};
2042
2043 let session_id = Self::require_str(args, "session_id")?;
2044 let url = Self::require_str(args, "url")?;
2045 let wait = match args
2046 .get("wait")
2047 .and_then(|v| v.as_str())
2048 .unwrap_or("dom_content_loaded")
2049 {
2050 "network_idle" => WarmupWait::NetworkIdle,
2051 _ => WarmupWait::DomContentLoaded,
2052 };
2053 let timeout_ms = args
2054 .get("timeout_ms")
2055 .and_then(serde_json::Value::as_u64)
2056 .unwrap_or(30_000);
2057 let stabilize_ms = args
2058 .get("stabilize_ms")
2059 .and_then(serde_json::Value::as_u64)
2060 .unwrap_or(0);
2061
2062 let (session_arc, attached_browser_arc, page_arc, _, _) =
2063 self.session_runtime(&session_id).await?;
2064 self.ensure_session_page(
2065 &session_id,
2066 &session_arc,
2067 &attached_browser_arc,
2068 &page_arc,
2069 None,
2070 Duration::from_millis(timeout_ms),
2071 false,
2072 )
2073 .await?;
2074
2075 let mut page_guard = page_arc.lock().await;
2076 let page = page_guard.as_mut().ok_or_else(|| {
2077 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2078 })?;
2079
2080 let report = page
2081 .warmup(WarmupOptions {
2082 url,
2083 wait,
2084 timeout_ms,
2085 stabilize_ms,
2086 })
2087 .await?;
2088 drop(page_guard);
2089
2090 Ok(json!({
2091 "session_id": session_id,
2092 "url": report.url,
2093 "elapsed_ms": report.elapsed_ms,
2094 "status_code": report.status_code,
2095 "title": report.title,
2096 "stabilized": report.stabilized
2097 }))
2098 }
2099
2100 async fn tool_browser_refresh(&self, args: &Value) -> Result<Value> {
2101 use crate::page::{RefreshOptions, WarmupWait};
2102
2103 let session_id = Self::require_str(args, "session_id")?;
2104 let wait = match args
2105 .get("wait")
2106 .and_then(|v| v.as_str())
2107 .unwrap_or("dom_content_loaded")
2108 {
2109 "network_idle" => WarmupWait::NetworkIdle,
2110 _ => WarmupWait::DomContentLoaded,
2111 };
2112 let timeout_ms = args
2113 .get("timeout_ms")
2114 .and_then(serde_json::Value::as_u64)
2115 .unwrap_or(30_000);
2116 let reset_connection = args
2117 .get("reset_connection")
2118 .and_then(serde_json::Value::as_bool)
2119 .unwrap_or(false);
2120
2121 let (session_arc, attached_browser_arc, page_arc, _, _) =
2122 self.session_runtime(&session_id).await?;
2123 self.ensure_session_page(
2124 &session_id,
2125 &session_arc,
2126 &attached_browser_arc,
2127 &page_arc,
2128 None,
2129 Duration::from_millis(timeout_ms),
2130 false,
2131 )
2132 .await?;
2133
2134 let mut page_guard = page_arc.lock().await;
2135 let page = page_guard.as_mut().ok_or_else(|| {
2136 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2137 })?;
2138
2139 let report = page
2140 .refresh(RefreshOptions {
2141 wait,
2142 timeout_ms,
2143 reset_connection,
2144 })
2145 .await?;
2146 drop(page_guard);
2147
2148 Ok(json!({
2149 "session_id": session_id,
2150 "url": report.url,
2151 "elapsed_ms": report.elapsed_ms,
2152 "status_code": report.status_code
2153 }))
2154 }
2155
2156 async fn tool_browser_release(&self, args: &Value) -> Result<Value> {
2157 let session_id = Self::require_str(args, "session_id")?;
2158
2159 let (session_arc, attached_browser_arc, attached_handler_task_arc, page_arc) = {
2161 let mut sessions = self.sessions.lock().await;
2162 let removed = sessions.remove(&session_id).ok_or_else(|| {
2163 BrowserError::ConfigError(format!("Unknown session: {session_id}"))
2164 })?;
2165 drop(sessions);
2166 (
2167 removed.handle,
2168 removed.attached_browser,
2169 removed.attached_handler_task,
2170 removed.page,
2171 )
2172 };
2173
2174 let handle = session_arc.lock().await.take();
2176 if let Some(h) = handle {
2177 h.release().await;
2178 }
2179
2180 let attached_browser = attached_browser_arc.lock().await.take();
2181 if let Some(mut browser) = attached_browser {
2182 let close_timeout = Duration::from_secs(5);
2183 match tokio::time::timeout(close_timeout, browser.close()).await {
2184 Ok(Ok(_)) => {}
2185 Ok(Err(error)) => {
2186 tracing::warn!(%session_id, %error, "attached browser close failed during release");
2187 }
2188 Err(_) => {
2189 tracing::warn!(%session_id, "attached browser close timed out during release");
2190 }
2191 }
2192 }
2193
2194 let attached_handler_task = attached_handler_task_arc.lock().await.take();
2195 if let Some(task) = attached_handler_task {
2196 task.abort();
2197 }
2198
2199 let page = page_arc.lock().await.take();
2200 if let Some(page) = page {
2201 page.close().await.ok();
2202 }
2203
2204 info!(%session_id, "MCP session released");
2205 Ok(json!({ "released": true, "session_id": session_id }))
2206 }
2207
2208 #[cfg(feature = "stealth")]
2209 async fn tool_browser_validate_stealth(&self, args: &Value) -> Result<Value> {
2210 use crate::validation::{ValidationResult, ValidationSuite, ValidationTarget};
2211
2212 let tier1_only = args
2213 .get("tier1_only")
2214 .and_then(Value::as_bool)
2215 .unwrap_or(false);
2216 let timeout_secs = args
2217 .get("timeout_secs")
2218 .and_then(Value::as_u64)
2219 .unwrap_or(30);
2220
2221 let targets = if tier1_only {
2223 ValidationTarget::tier1().to_vec()
2224 } else {
2225 args.get("targets").and_then(|v| v.as_array()).map_or_else(
2226 || ValidationTarget::tier1().to_vec(),
2227 |arr| {
2228 arr.iter()
2229 .filter_map(|v| v.as_str())
2230 .filter_map(|s| match s {
2231 "creepjs" => Some(ValidationTarget::CreepJs),
2232 "browserscan" => Some(ValidationTarget::BrowserScan),
2233 "fingerprint_js" => Some(ValidationTarget::FingerprintJs),
2234 "kasada" => Some(ValidationTarget::Kasada),
2235 "cloudflare" => Some(ValidationTarget::Cloudflare),
2236 "akamai" => Some(ValidationTarget::Akamai),
2237 "data_dome" => Some(ValidationTarget::DataDome),
2238 "perimeter_x" => Some(ValidationTarget::PerimeterX),
2239 _ => None,
2240 })
2241 .collect::<Vec<_>>()
2242 },
2243 )
2244 };
2245
2246 let mut results = Vec::with_capacity(targets.len());
2248 for target in targets {
2249 let timed = tokio::time::timeout(
2250 Duration::from_secs(timeout_secs),
2251 ValidationSuite::run_one(&self.pool, target),
2252 )
2253 .await;
2254 match timed {
2255 Ok(result) => results.push(result),
2256 Err(_) => results.push(ValidationResult::failed(
2257 target,
2258 &format!("validation timed out after {timeout_secs}s"),
2259 )),
2260 }
2261 }
2262
2263 serde_json::to_value(&results)
2265 .map_err(|e| BrowserError::ConfigError(format!("failed to serialize results: {e}")))
2266 }
2267
2268 fn tool_pool_stats(&self) -> Value {
2269 let stats = self.pool.stats();
2270 json!({
2271 "active": stats.active,
2272 "max": stats.max,
2273 "available": stats.available
2274 })
2275 }
2276
2277 async fn handle_resources_list(&self, id: Value) -> JsonRpcResponse {
2280 let resources: Vec<Value> = self
2281 .sessions
2282 .lock()
2283 .await
2284 .keys()
2285 .map(|sid| {
2286 json!({
2287 "uri": format!("browser://session/{sid}"),
2288 "name": format!("Browser session {sid}"),
2289 "mimeType": "application/json"
2290 })
2291 })
2292 .collect();
2293
2294 JsonRpcResponse::ok(id, json!({ "resources": resources }))
2295 }
2296
2297 async fn handle_resources_read(&self, id: Value, params: Value) -> JsonRpcResponse {
2300 let uri = match params.get("uri").and_then(|v| v.as_str()) {
2301 Some(u) => u.to_string(),
2302 None => return JsonRpcResponse::err(id, -32602, "Missing 'uri'"),
2303 };
2304
2305 let session_id = uri
2307 .strip_prefix("browser://session/")
2308 .unwrap_or("")
2309 .to_string();
2310
2311 let session_config: Option<Value> = {
2313 let sessions = self.sessions.lock().await;
2314 sessions.get(&session_id).map(|s| {
2315 json!({
2316 "stealth_level": format!("{:?}", s.stealth_level).to_lowercase(),
2317 "tls_profile": s.tls_profile,
2318 "webrtc_policy": s.webrtc_policy,
2319 "cdp_fix_mode": s.cdp_fix_mode,
2320 "proxy": s.proxy,
2321 "target_profile": s.target_profile,
2322 "current_url": s.current_url,
2323 "has_saved_snapshot": s.saved_snapshot.is_some(),
2324 "attach_endpoint": s.attach_endpoint,
2325 "has_behavior_plan": s.behavior_plan.is_some(),
2326 "behavior_plan": s.behavior_plan.as_ref()
2327 })
2328 })
2329 };
2330
2331 if let Some(config) = session_config {
2332 let pool_stats = self.pool.stats();
2333 JsonRpcResponse::ok(
2334 id,
2335 json!({
2336 "contents": [{
2337 "uri": uri,
2338 "mimeType": "application/json",
2339 "text": serde_json::to_string_pretty(&json!({
2340 "session_id": session_id,
2341 "config": config,
2342 "pool_active": pool_stats.active,
2343 "pool_max": pool_stats.max
2344 })).unwrap_or_default()
2345 }]
2346 }),
2347 )
2348 } else {
2349 JsonRpcResponse::err(id, -32002, format!("Resource not found: {uri}"))
2350 }
2351 }
2352
2353 async fn session_runtime(
2356 &self,
2357 session_id: &str,
2358 ) -> Result<(
2359 Arc<Mutex<Option<BrowserHandle>>>,
2360 Arc<Mutex<Option<Browser>>>,
2361 Arc<Mutex<Option<crate::page::PageHandle>>>,
2362 Option<String>,
2363 bool,
2364 )> {
2365 self.sessions
2366 .lock()
2367 .await
2368 .get(session_id)
2369 .map(|s| {
2370 (
2371 s.handle.clone(),
2372 s.attached_browser.clone(),
2373 s.page.clone(),
2374 s.current_url.clone(),
2375 s.target_profile == "reddit",
2376 )
2377 })
2378 .ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))
2379 }
2380
2381 #[expect(
2382 clippy::too_many_arguments,
2383 reason = "session runtime handles and bootstrap options are passed explicitly for clarity"
2384 )]
2385 async fn ensure_session_page(
2386 &self,
2387 session_id: &str,
2388 handle_arc: &Arc<Mutex<Option<BrowserHandle>>>,
2389 attached_browser_arc: &Arc<Mutex<Option<Browser>>>,
2390 page_arc: &Arc<Mutex<Option<crate::page::PageHandle>>>,
2391 current_url: Option<&str>,
2392 timeout: Duration,
2393 reddit_profile: bool,
2394 ) -> Result<()> {
2395 let mut page_guard = page_arc.lock().await;
2396 let created = if page_guard.is_none() {
2397 let new_page =
2398 Self::create_session_page(session_id, handle_arc, attached_browser_arc).await?;
2399
2400 *page_guard = Some(new_page);
2401 true
2402 } else {
2403 false
2404 };
2405
2406 if created
2407 && let Some(url) = current_url
2408 && let Some(page) = page_guard.as_mut()
2409 {
2410 Self::navigate_with_profile(page, url, timeout, reddit_profile).await?;
2411 }
2412
2413 drop(page_guard);
2414
2415 Ok(())
2416 }
2417
2418 async fn create_session_page(
2419 session_id: &str,
2420 handle_arc: &Arc<Mutex<Option<BrowserHandle>>>,
2421 attached_browser_arc: &Arc<Mutex<Option<Browser>>>,
2422 ) -> Result<crate::page::PageHandle> {
2423 let handle_guard = handle_arc.lock().await;
2424 if let Some(handle) = handle_guard.as_ref() {
2425 let browser = handle.browser().ok_or_else(|| {
2427 BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
2428 })?;
2429 let page = browser.new_page().await?;
2430 drop(handle_guard);
2431 return Ok(page);
2432 }
2433 drop(handle_guard);
2434
2435 let browser_guard = attached_browser_arc.lock().await;
2436 let browser = browser_guard.as_ref().ok_or_else(|| {
2437 BrowserError::ConfigError(format!("Session already released: {session_id}"))
2438 })?;
2439 let raw_page =
2440 browser
2441 .new_page("about:blank")
2442 .await
2443 .map_err(|e| BrowserError::CdpError {
2444 operation: "Browser.newPage".to_string(),
2445 message: e.to_string(),
2446 })?;
2447 drop(browser_guard);
2448
2449 Ok(crate::page::PageHandle::new(
2450 raw_page,
2451 Duration::from_secs(30),
2452 ))
2453 }
2454
2455 async fn navigate_with_profile(
2456 page: &mut crate::page::PageHandle,
2457 url: &str,
2458 timeout: Duration,
2459 reddit_profile: bool,
2460 ) -> Result<(bool, bool)> {
2461 let wait_until = if reddit_profile {
2462 WaitUntil::DomContentLoaded
2463 } else {
2464 WaitUntil::Selector("body".to_string())
2465 };
2466
2467 page.navigate(url, wait_until, timeout).await?;
2468
2469 if reddit_profile || url.contains("reddit.com") {
2470 return Self::wait_for_reddit_challenge(page, timeout).await;
2471 }
2472
2473 Ok((false, true))
2474 }
2475
2476 async fn wait_for_reddit_challenge(
2477 page: &crate::page::PageHandle,
2478 timeout: Duration,
2479 ) -> Result<(bool, bool)> {
2480 let max_wait = timeout.min(Duration::from_secs(15));
2481 let mut elapsed = Duration::ZERO;
2482 let interval = Duration::from_millis(500);
2483 let mut challenge_seen = false;
2484
2485 while elapsed <= max_wait {
2486 let challenge_state = page
2487 .eval::<Value>(
2488 r#"(() => {
2489 const title = (document.title || "").toLowerCase();
2490 const href = (location.href || "").toLowerCase();
2491 const body = (document.body?.innerText || "").toLowerCase();
2492 const challenge =
2493 title.includes("verification") ||
2494 title.includes("just a moment") ||
2495 href.includes("/js_challenge") ||
2496 body.includes("please wait for verification") ||
2497 body.includes("verify you are human");
2498 return {
2499 challenge,
2500 ready: document.readyState === "complete"
2501 };
2502 })()"#,
2503 )
2504 .await
2505 .unwrap_or_else(|_| json!({"challenge": false, "ready": true}));
2506
2507 let is_challenge = challenge_state
2508 .get("challenge")
2509 .and_then(Value::as_bool)
2510 .unwrap_or(false);
2511 let ready = challenge_state
2512 .get("ready")
2513 .and_then(Value::as_bool)
2514 .unwrap_or(true);
2515
2516 challenge_seen |= is_challenge;
2517 if !is_challenge && ready {
2518 return Ok((challenge_seen, true));
2519 }
2520
2521 sleep(interval).await;
2522 elapsed += interval;
2523 }
2524
2525 Ok((challenge_seen, false))
2526 }
2527
2528 #[cfg(feature = "stealth")]
2529 async fn session_handle_and_stealth(
2530 &self,
2531 session_id: &str,
2532 ) -> Result<(Arc<Mutex<Option<BrowserHandle>>>, String)> {
2533 self.sessions
2534 .lock()
2535 .await
2536 .get(session_id)
2537 .map(|s| {
2538 (
2539 s.handle.clone(),
2540 format!("{:?}", s.stealth_level).to_lowercase(),
2541 )
2542 })
2543 .ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))
2544 }
2545
2546 async fn tool_browser_extract_with_fallback(&self, args: &Value) -> Result<Value> {
2550 let session_id = Self::require_str(args, "session_id")?;
2551 let url = Self::require_str(args, "url")?;
2552 let timeout_secs = args
2553 .get("timeout_secs")
2554 .and_then(serde_json::Value::as_f64)
2555 .unwrap_or(30.0);
2556 let selectors = Self::parse_root_selectors(args)?;
2557 let schema = Self::parse_extract_schema(args)?;
2558
2559 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
2560 self.session_runtime(&session_id).await?;
2561 self.ensure_session_page(
2562 &session_id,
2563 &session_arc,
2564 &attached_browser_arc,
2565 &page_arc,
2566 None,
2567 Duration::from_secs_f64(timeout_secs),
2568 reddit_profile,
2569 )
2570 .await?;
2571
2572 let mut page_guard = page_arc.lock().await;
2573 let page = page_guard.as_mut().ok_or_else(|| {
2574 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2575 })?;
2576
2577 Self::navigate_with_profile(
2578 page,
2579 &url,
2580 Duration::from_secs_f64(timeout_secs),
2581 reddit_profile,
2582 )
2583 .await?;
2584
2585 let mut matched_selector = String::new();
2586 let mut results: Vec<Value> = vec![];
2587
2588 for selector in &selectors {
2589 let roots = page.query_selector_all(selector).await?;
2590 if roots.is_empty() {
2591 continue;
2592 }
2593
2594 let mut selector_results: Vec<Value> = Vec::with_capacity(roots.len());
2595 for root in &roots {
2596 if let Some(obj) = Self::extract_record(root, &schema).await {
2597 selector_results.push(Value::Object(obj));
2598 }
2599 }
2600
2601 if selector_results.is_empty() {
2602 continue;
2603 }
2604
2605 matched_selector = selector.clone();
2606 results = selector_results;
2607 break;
2608 }
2609 drop(page_guard);
2610 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
2611 session.current_url = Some(url.clone());
2612 }
2613
2614 Ok(json!({
2615 "url": url,
2616 "matched_selector": matched_selector,
2617 "tried_selectors": selectors,
2618 "count": results.len(),
2619 "results": results
2620 }))
2621 }
2622
2623 async fn tool_browser_extract_resilient(&self, args: &Value) -> Result<Value> {
2628 let session_id = Self::require_str(args, "session_id")?;
2629 let url = Self::require_str(args, "url")?;
2630 let root_selector = Self::require_str(args, "root_selector")?;
2631 let timeout_secs = args
2632 .get("timeout_secs")
2633 .and_then(serde_json::Value::as_f64)
2634 .unwrap_or(30.0);
2635 let schema = Self::parse_extract_schema(args)?;
2636
2637 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
2638 self.session_runtime(&session_id).await?;
2639 self.ensure_session_page(
2640 &session_id,
2641 &session_arc,
2642 &attached_browser_arc,
2643 &page_arc,
2644 None,
2645 Duration::from_secs_f64(timeout_secs),
2646 reddit_profile,
2647 )
2648 .await?;
2649
2650 let mut page_guard = page_arc.lock().await;
2651 let page = page_guard.as_mut().ok_or_else(|| {
2652 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2653 })?;
2654
2655 Self::navigate_with_profile(
2656 page,
2657 &url,
2658 Duration::from_secs_f64(timeout_secs),
2659 reddit_profile,
2660 )
2661 .await?;
2662
2663 let roots = page.query_selector_all(&root_selector).await?;
2664 let mut results: Vec<Value> = Vec::with_capacity(roots.len());
2667 let mut skipped: usize = 0;
2668 for root in &roots {
2669 match Self::extract_record(root, &schema).await {
2670 Some(obj) => results.push(Value::Object(obj)),
2671 None => skipped += 1,
2672 }
2673 }
2674 drop(page_guard);
2675 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
2676 session.current_url = Some(url.clone());
2677 }
2678
2679 Ok(json!({
2680 "url": url,
2681 "root_selector": root_selector,
2682 "count": results.len(),
2683 "skipped": skipped,
2684 "results": results
2685 }))
2686 }
2687
2688 async fn extract_record(
2689 root: &crate::page::NodeHandle,
2690 schema: &[(String, ExtractFieldDef)],
2691 ) -> Option<serde_json::Map<String, Value>> {
2692 let mut obj = serde_json::Map::new();
2693 for (field_name, def) in schema {
2694 let Ok(children) = root.children_matching(&def.selector).await else {
2695 if def.required {
2696 return None;
2697 }
2698 obj.insert(field_name.clone(), Value::Null);
2699 continue;
2700 };
2701 let val = match children.into_iter().next() {
2702 None => {
2703 if def.required {
2704 return None;
2705 }
2706 Value::Null
2707 }
2708 Some(node) => {
2709 if let Some(attr) = &def.attr {
2710 node.attr(attr)
2711 .await
2712 .map_or(Value::Null, |opt| opt.map_or(Value::Null, Value::String))
2713 } else {
2714 node.text_content().await.map_or(Value::Null, Value::String)
2715 }
2716 }
2717 };
2718 obj.insert(field_name.clone(), val);
2719 }
2720 Some(obj)
2721 }
2722
2723 fn require_str(args: &Value, key: &str) -> Result<String> {
2724 args.get(key)
2725 .and_then(|v| v.as_str())
2726 .map(ToString::to_string)
2727 .ok_or_else(|| BrowserError::ConfigError(format!("Missing required argument: {key}")))
2728 }
2729
2730 fn parse_acquisition_mode(mode: &str) -> Result<AcquisitionMode> {
2731 match mode {
2732 "fast" => Ok(AcquisitionMode::Fast),
2733 "resilient" => Ok(AcquisitionMode::Resilient),
2734 "hostile" => Ok(AcquisitionMode::Hostile),
2735 "investigate" => Ok(AcquisitionMode::Investigate),
2736 other => Err(BrowserError::ConfigError(format!(
2737 "Invalid mode '{other}'. Use one of: fast, resilient, hostile, investigate"
2738 ))),
2739 }
2740 }
2741
2742 fn parse_acquisition_request(args: &Value) -> Result<AcquisitionRequest> {
2743 const MAX_ACQUISITION_TIMEOUT_SECS: f64 = 86_400.0;
2744
2745 let url = Self::require_str(args, "url")?;
2746 let mode_raw = Self::require_str(args, "mode")?;
2747 let mode = Self::parse_acquisition_mode(&mode_raw)?;
2748
2749 let wait_for_selector = args
2750 .get("wait_for_selector")
2751 .or_else(|| args.get("selector_wait"))
2752 .and_then(Value::as_str)
2753 .map(ToString::to_string);
2754
2755 let extraction_js = args
2756 .get("extraction_js")
2757 .and_then(Value::as_str)
2758 .map(ToString::to_string);
2759
2760 let browserbase_enabled = args
2761 .get("browserbase_enabled")
2762 .or_else(|| args.get("use_browserbase"))
2763 .and_then(Value::as_bool)
2764 .unwrap_or(false);
2765
2766 let total_timeout = match args.get("total_timeout_secs").and_then(Value::as_f64) {
2767 Some(value)
2768 if value.is_finite() && value > 0.0 && value <= MAX_ACQUISITION_TIMEOUT_SECS =>
2769 {
2770 Duration::from_secs_f64(value)
2771 }
2772 Some(_) => {
2773 return Err(BrowserError::ConfigError(format!(
2774 "total_timeout_secs must be a positive finite number <= {MAX_ACQUISITION_TIMEOUT_SECS}"
2775 )));
2776 }
2777 None => AcquisitionRequest::default().total_timeout,
2778 };
2779
2780 Ok(AcquisitionRequest {
2781 url,
2782 mode,
2783 wait_for_selector,
2784 extraction_js,
2785 total_timeout,
2786 browserbase_enabled,
2787 ..AcquisitionRequest::default()
2788 })
2789 }
2790
2791 fn acquisition_result_to_tool_output(result: &AcquisitionResult) -> Value {
2792 let strategy_used = serde_json::to_value(result.strategy_used).unwrap_or(Value::Null);
2793 let attempted = serde_json::to_value(&result.attempted).unwrap_or(Value::Array(Vec::new()));
2794 let failures = serde_json::to_value(&result.failures).unwrap_or(Value::Array(Vec::new()));
2795 let freshness = serde_json::to_value(&result.freshness).unwrap_or(Value::Null);
2796
2797 json!({
2798 "success": result.success,
2799 "strategy_used": strategy_used,
2800 "final_url": result.final_url,
2801 "status_code": result.status_code,
2802 "extracted": result.extracted,
2803 "html_excerpt": result.html_excerpt,
2804 "freshness": freshness,
2805 "diagnostics": {
2806 "attempted": attempted,
2807 "timed_out": result.timed_out,
2808 "failure_count": result.failures.len(),
2809 "failures": failures
2810 }
2811 })
2812 }
2813
2814 fn parse_root_selectors(args: &Value) -> Result<Vec<String>> {
2815 let selectors: Vec<String> = args
2816 .get("root_selectors")
2817 .and_then(Value::as_array)
2818 .ok_or_else(|| {
2819 BrowserError::ConfigError(
2820 "Missing or non-array 'root_selectors' argument".to_string(),
2821 )
2822 })?
2823 .iter()
2824 .filter_map(|v| v.as_str().map(str::to_string))
2825 .collect();
2826
2827 if selectors.is_empty() {
2828 return Err(BrowserError::ConfigError(
2829 "root_selectors must contain at least one entry".to_string(),
2830 ));
2831 }
2832 Ok(selectors)
2833 }
2834
2835 fn parse_extract_schema(args: &Value) -> Result<Vec<(String, ExtractFieldDef)>> {
2836 let schema_obj = args
2837 .get("schema")
2838 .and_then(Value::as_object)
2839 .ok_or_else(|| {
2840 BrowserError::ConfigError("Missing or non-object 'schema' argument".to_string())
2841 })?;
2842
2843 Ok(schema_obj
2844 .iter()
2845 .filter_map(|(name, spec)| {
2846 let selector = spec
2847 .get("selector")
2848 .and_then(Value::as_str)
2849 .map(ToString::to_string)?;
2850 let attr = spec
2851 .get("attr")
2852 .and_then(Value::as_str)
2853 .map(ToString::to_string);
2854 let required = spec
2855 .get("required")
2856 .and_then(Value::as_bool)
2857 .unwrap_or(false);
2858 Some((
2859 name.clone(),
2860 ExtractFieldDef {
2861 selector,
2862 attr,
2863 required,
2864 },
2865 ))
2866 })
2867 .collect())
2868 }
2869}
2870
2871fn mcp_enabled_from(value: &str) -> bool {
2874 matches!(value.to_lowercase().as_str(), "true" | "1" | "yes")
2875}
2876
2877#[must_use]
2882pub fn is_mcp_enabled() -> bool {
2883 mcp_enabled_from(&std::env::var("STYGIAN_MCP_ENABLED").unwrap_or_default())
2884}
2885
2886#[cfg(test)]
2887mod tests {
2888 use super::*;
2889
2890 #[test]
2891 fn tool_defs_include_browser_query() {
2892 let defs = &*TOOL_DEFINITIONS;
2893 assert!(
2894 defs.iter()
2895 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query")),
2896 "TOOL_DEFINITIONS must contain browser_query"
2897 );
2898 }
2899
2900 #[test]
2901 fn tool_defs_include_browser_extract() {
2902 let defs = &*TOOL_DEFINITIONS;
2903 assert!(
2904 defs.iter()
2905 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract")),
2906 "TOOL_DEFINITIONS must contain browser_extract"
2907 );
2908 }
2909
2910 #[test]
2911 fn tool_defs_include_browser_acquire_and_extract() {
2912 let defs = &*TOOL_DEFINITIONS;
2913 assert!(
2914 defs.iter()
2915 .any(|t| t.get("name").and_then(|n| n.as_str())
2916 == Some("browser_acquire_and_extract")),
2917 "TOOL_DEFINITIONS must contain browser_acquire_and_extract"
2918 );
2919 }
2920
2921 #[test]
2922 fn tool_defs_include_browser_extract_with_fallback() {
2923 let defs = &*TOOL_DEFINITIONS;
2924 assert!(
2925 defs.iter()
2926 .any(|t| t.get("name").and_then(|n| n.as_str())
2927 == Some("browser_extract_with_fallback")),
2928 "TOOL_DEFINITIONS must contain browser_extract_with_fallback"
2929 );
2930 }
2931
2932 #[test]
2933 fn tool_defs_include_browser_extract_resilient() {
2934 let defs = &*TOOL_DEFINITIONS;
2935 assert!(
2936 defs.iter().any(
2937 |t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract_resilient")
2938 ),
2939 "TOOL_DEFINITIONS must contain browser_extract_resilient"
2940 );
2941 }
2942
2943 #[test]
2944 fn browser_extract_with_fallback_requires_root_selectors()
2945 -> std::result::Result<(), Box<dyn std::error::Error>> {
2946 let defs = &*TOOL_DEFINITIONS;
2947 let def = defs
2948 .iter()
2949 .find(|t| {
2950 t.get("name").and_then(|n| n.as_str()) == Some("browser_extract_with_fallback")
2951 })
2952 .ok_or("browser_extract_with_fallback must be in TOOL_DEFINITIONS")?;
2953 let required = def
2954 .get("inputSchema")
2955 .and_then(|s| s.get("required"))
2956 .and_then(Value::as_array)
2957 .ok_or("browser_extract_with_fallback inputSchema missing 'required' array")?;
2958 assert!(
2959 required.iter().any(|v| v == "root_selectors"),
2960 "root_selectors must be required in browser_extract_with_fallback"
2961 );
2962 Ok(())
2963 }
2964
2965 #[test]
2966 fn browser_query_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
2967 let defs = &*TOOL_DEFINITIONS;
2969 let def = defs
2970 .iter()
2971 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query"))
2972 .ok_or("browser_query must be in TOOL_DEFINITIONS")?;
2973 let required = def
2974 .get("inputSchema")
2975 .and_then(|s| s.get("required"))
2976 .ok_or("browser_query inputSchema missing 'required'")?;
2977 assert!(
2978 required
2979 .as_array()
2980 .is_some_and(|a| a.iter().any(|v| v == "session_id"))
2981 );
2982 assert!(
2983 required
2984 .as_array()
2985 .is_some_and(|a| a.iter().any(|v| v == "url"))
2986 );
2987 assert!(
2988 required
2989 .as_array()
2990 .is_some_and(|a| a.iter().any(|v| v == "selector"))
2991 );
2992 Ok(())
2993 }
2994
2995 #[test]
2996 fn browser_extract_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
2997 let defs = &*TOOL_DEFINITIONS;
2998 let def = defs
2999 .iter()
3000 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract"))
3001 .ok_or("browser_extract must be in TOOL_DEFINITIONS")?;
3002 let required = def
3003 .get("inputSchema")
3004 .and_then(|s| s.get("required"))
3005 .ok_or("browser_extract inputSchema missing 'required'")?;
3006 assert!(
3007 required
3008 .as_array()
3009 .is_some_and(|a| a.iter().any(|v| v == "root_selector"))
3010 );
3011 assert!(
3012 required
3013 .as_array()
3014 .is_some_and(|a| a.iter().any(|v| v == "schema"))
3015 );
3016 Ok(())
3017 }
3018
3019 #[test]
3020 fn browser_acquire_and_extract_required_args()
3021 -> std::result::Result<(), Box<dyn std::error::Error>> {
3022 let defs = &*TOOL_DEFINITIONS;
3023 let def = defs
3024 .iter()
3025 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_acquire_and_extract"))
3026 .ok_or("browser_acquire_and_extract must be in TOOL_DEFINITIONS")?;
3027
3028 let required = def
3029 .get("inputSchema")
3030 .and_then(|s| s.get("required"))
3031 .and_then(Value::as_array)
3032 .ok_or("browser_acquire_and_extract inputSchema missing 'required' array")?;
3033 assert!(required.iter().any(|v| v == "url"));
3034 assert!(required.iter().any(|v| v == "mode"));
3035
3036 let mode_values = def
3037 .get("inputSchema")
3038 .and_then(|s| s.get("properties"))
3039 .and_then(|p| p.get("mode"))
3040 .and_then(|m| m.get("enum"))
3041 .and_then(Value::as_array)
3042 .ok_or("browser_acquire_and_extract mode enum missing")?;
3043 assert!(mode_values.iter().any(|v| v == "fast"));
3044 assert!(mode_values.iter().any(|v| v == "resilient"));
3045 assert!(mode_values.iter().any(|v| v == "hostile"));
3046 assert!(mode_values.iter().any(|v| v == "investigate"));
3047 Ok(())
3048 }
3049
3050 #[test]
3051 fn acquisition_mode_parsing_accepts_all_supported_values()
3052 -> std::result::Result<(), Box<dyn std::error::Error>> {
3053 assert_eq!(
3054 McpBrowserServer::parse_acquisition_mode("fast")?,
3055 AcquisitionMode::Fast
3056 );
3057 assert_eq!(
3058 McpBrowserServer::parse_acquisition_mode("resilient")?,
3059 AcquisitionMode::Resilient
3060 );
3061 assert_eq!(
3062 McpBrowserServer::parse_acquisition_mode("hostile")?,
3063 AcquisitionMode::Hostile
3064 );
3065 assert_eq!(
3066 McpBrowserServer::parse_acquisition_mode("investigate")?,
3067 AcquisitionMode::Investigate
3068 );
3069 Ok(())
3070 }
3071
3072 #[test]
3073 fn acquisition_mode_parsing_rejects_unknown() {
3074 let err = McpBrowserServer::parse_acquisition_mode("invalid").err();
3075 assert!(err.is_some(), "invalid mode should return an error");
3076 }
3077
3078 #[test]
3079 fn acquisition_request_validation_missing_url_fails() {
3080 let err = McpBrowserServer::parse_acquisition_request(&json!({"mode": "fast"})).err();
3081 assert!(err.is_some(), "missing url should fail validation");
3082 }
3083
3084 #[test]
3085 fn acquisition_request_validation_invalid_timeout_fails() {
3086 let err = McpBrowserServer::parse_acquisition_request(&json!({
3087 "url": "https://example.com",
3088 "mode": "resilient",
3089 "total_timeout_secs": 0
3090 }))
3091 .err();
3092 assert!(err.is_some(), "zero timeout should fail validation");
3093 }
3094
3095 #[test]
3096 fn acquisition_result_output_has_stable_top_level_shape() {
3097 let result = AcquisitionResult {
3098 success: false,
3099 strategy_used: None,
3100 attempted: vec![crate::StrategyUsed::DirectHttp],
3101 final_url: Some("https://example.com".to_string()),
3102 status_code: Some(429),
3103 html_excerpt: Some("<html>blocked</html>".to_string()),
3104 extracted: None,
3105 failures: vec![crate::StageFailure {
3106 strategy: crate::StrategyUsed::DirectHttp,
3107 kind: crate::StageFailureKind::Blocked,
3108 message: "blocked status".to_string(),
3109 }],
3110 timed_out: false,
3111 freshness: None,
3112 replay_defense: None,
3113 transport_realism: None,
3114 interstitial: None,
3115 };
3116
3117 let payload = McpBrowserServer::acquisition_result_to_tool_output(&result);
3118 assert!(payload.get("success").is_some());
3119 assert!(payload.get("strategy_used").is_some());
3120 assert!(payload.get("final_url").is_some());
3121 assert!(payload.get("status_code").is_some());
3122 assert!(payload.get("html_excerpt").is_some());
3123 assert!(payload.get("diagnostics").is_some());
3124
3125 let diagnostics = payload.get("diagnostics");
3126 assert!(
3127 diagnostics
3128 .and_then(|d| d.get("attempted"))
3129 .and_then(Value::as_array)
3130 .is_some(),
3131 "diagnostics.attempted should be an array"
3132 );
3133 assert!(
3134 diagnostics
3135 .and_then(|d| d.get("failures"))
3136 .and_then(Value::as_array)
3137 .is_some(),
3138 "diagnostics.failures should be an array"
3139 );
3140 }
3141
3142 #[test]
3143 fn jsonrpc_response_ok_serializes() -> std::result::Result<(), Box<dyn std::error::Error>> {
3144 let r = JsonRpcResponse::ok(json!(1), json!({ "hello": "world" }));
3145 let s = serde_json::to_string(&r)?;
3146 assert!(s.contains("\"hello\""));
3147 assert!(s.contains("\"jsonrpc\":\"2.0\""));
3148 assert!(!s.contains("\"error\""));
3149 Ok(())
3150 }
3151
3152 #[test]
3153 fn jsonrpc_response_err_serializes() -> std::result::Result<(), Box<dyn std::error::Error>> {
3154 let r = JsonRpcResponse::err(json!(2), -32601, "Method not found");
3155 let s = serde_json::to_string(&r)?;
3156 assert!(s.contains("-32601"));
3157 assert!(s.contains("Method not found"));
3158 assert!(!s.contains("\"result\""));
3159 Ok(())
3160 }
3161
3162 #[test]
3163 fn browser_extract_schema_parse_empty_schema()
3164 -> std::result::Result<(), Box<dyn std::error::Error>> {
3165 let defs = &*TOOL_DEFINITIONS;
3168 let def = defs
3169 .iter()
3170 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract"))
3171 .ok_or("browser_extract must be in TOOL_DEFINITIONS")?;
3172 let required = def
3173 .get("inputSchema")
3174 .and_then(|s| s.get("required"))
3175 .and_then(|r| r.as_array())
3176 .ok_or("browser_extract inputSchema missing 'required' array")?;
3177 assert!(
3178 required.iter().any(|v| v == "schema"),
3179 "schema must be required in browser_extract"
3180 );
3181 let schema_type = def
3183 .get("inputSchema")
3184 .and_then(|s| s.get("properties"))
3185 .and_then(|p| p.get("schema"))
3186 .and_then(|s| s.get("type"))
3187 .and_then(|t| t.as_str())
3188 .ok_or("browser_extract inputSchema.properties.schema.type missing")?;
3189 assert_eq!(
3190 schema_type, "object",
3191 "schema property must have type object"
3192 );
3193 Ok(())
3194 }
3195
3196 #[test]
3197 fn browser_query_missing_session() -> std::result::Result<(), Box<dyn std::error::Error>> {
3198 let defs = &*TOOL_DEFINITIONS;
3206 let def = defs
3207 .iter()
3208 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query"))
3209 .ok_or("browser_query must be in TOOL_DEFINITIONS")?;
3210 let required = def
3211 .get("inputSchema")
3212 .and_then(|s| s.get("required"))
3213 .and_then(|r| r.as_array())
3214 .ok_or("browser_query inputSchema missing 'required' array")?;
3215 assert!(
3217 required.iter().any(|v| v == "session_id"),
3218 "session_id must be required so missing-session is caught at validation"
3219 );
3220 Ok(())
3221 }
3222
3223 #[test]
3224 fn mcp_env_disabled_by_default() {
3225 let cases = ["false", "0", "no", "", "off"];
3227 for val in cases {
3228 assert!(!mcp_enabled_from(val), "expected disabled for {val:?}");
3229 }
3230 }
3231
3232 #[test]
3233 fn mcp_env_enabled_values() {
3234 let cases = ["true", "True", "TRUE", "1", "yes", "YES"];
3235 for val in cases {
3236 assert!(mcp_enabled_from(val), "expected enabled for {val:?}");
3237 }
3238 }
3239
3240 #[test]
3241 fn browser_warmup_in_tool_definitions() -> std::result::Result<(), Box<dyn std::error::Error>> {
3242 let defs = &*TOOL_DEFINITIONS;
3243 let def = defs
3244 .iter()
3245 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_warmup"))
3246 .ok_or("browser_warmup must be in TOOL_DEFINITIONS")?;
3247 let required = def
3248 .get("inputSchema")
3249 .and_then(|s| s.get("required"))
3250 .and_then(|r| r.as_array())
3251 .ok_or("browser_warmup inputSchema missing 'required' array")?;
3252 assert!(
3253 required.iter().any(|v| v == "session_id"),
3254 "session_id must be required in browser_warmup"
3255 );
3256 assert!(
3257 required.iter().any(|v| v == "url"),
3258 "url must be required in browser_warmup"
3259 );
3260 Ok(())
3261 }
3262
3263 #[test]
3264 fn browser_refresh_in_tool_definitions() -> std::result::Result<(), Box<dyn std::error::Error>>
3265 {
3266 let defs = &*TOOL_DEFINITIONS;
3267 let def = defs
3268 .iter()
3269 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_refresh"))
3270 .ok_or("browser_refresh must be in TOOL_DEFINITIONS")?;
3271 let required = def
3272 .get("inputSchema")
3273 .and_then(|s| s.get("required"))
3274 .and_then(|r| r.as_array())
3275 .ok_or("browser_refresh inputSchema missing 'required' array")?;
3276 assert!(
3277 required.iter().any(|v| v == "session_id"),
3278 "session_id must be required in browser_refresh"
3279 );
3280 Ok(())
3281 }
3282
3283 #[test]
3284 fn tool_defs_include_browser_auth_session() {
3285 let defs = &*TOOL_DEFINITIONS;
3286 assert!(
3287 defs.iter()
3288 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_auth_session")),
3289 "TOOL_DEFINITIONS must contain browser_auth_session"
3290 );
3291 }
3292
3293 #[test]
3294 fn browser_auth_session_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
3295 let defs = &*TOOL_DEFINITIONS;
3296 let def = defs
3297 .iter()
3298 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_auth_session"))
3299 .ok_or("browser_auth_session must be in TOOL_DEFINITIONS")?;
3300 let required = def
3301 .get("inputSchema")
3302 .and_then(|s| s.get("required"))
3303 .and_then(Value::as_array)
3304 .ok_or("browser_auth_session inputSchema missing 'required' array")?;
3305
3306 assert!(
3307 required.iter().any(|v| v == "session_id"),
3308 "session_id must be required in browser_auth_session"
3309 );
3310 assert!(
3311 required.iter().any(|v| v == "mode"),
3312 "mode must be required in browser_auth_session"
3313 );
3314 Ok(())
3315 }
3316
3317 #[test]
3318 fn tool_defs_include_browser_session_save() {
3319 let defs = &*TOOL_DEFINITIONS;
3320 assert!(
3321 defs.iter()
3322 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_session_save")),
3323 "TOOL_DEFINITIONS must contain browser_session_save"
3324 );
3325 }
3326
3327 #[test]
3328 fn tool_defs_include_browser_session_restore() {
3329 let defs = &*TOOL_DEFINITIONS;
3330 assert!(
3331 defs.iter()
3332 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_session_restore")),
3333 "TOOL_DEFINITIONS must contain browser_session_restore"
3334 );
3335 }
3336
3337 #[test]
3338 fn tool_defs_include_browser_humanize() {
3339 let defs = &*TOOL_DEFINITIONS;
3340 assert!(
3341 defs.iter()
3342 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_humanize")),
3343 "TOOL_DEFINITIONS must contain browser_humanize"
3344 );
3345 }
3346
3347 #[test]
3348 fn tool_defs_include_browser_apply_behavior_json() {
3349 let defs = &*TOOL_DEFINITIONS;
3350 assert!(
3351 defs.iter()
3352 .any(|t| t.get("name").and_then(|n| n.as_str())
3353 == Some("browser_apply_behavior_json")),
3354 "TOOL_DEFINITIONS must contain browser_apply_behavior_json"
3355 );
3356 }
3357
3358 #[test]
3359 fn browser_apply_behavior_json_requires_behavior()
3360 -> std::result::Result<(), Box<dyn std::error::Error>> {
3361 let defs = &*TOOL_DEFINITIONS;
3362 let def = defs
3363 .iter()
3364 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_apply_behavior_json"))
3365 .ok_or("browser_apply_behavior_json must be in TOOL_DEFINITIONS")?;
3366 let required = def
3367 .get("inputSchema")
3368 .and_then(|s| s.get("required"))
3369 .and_then(Value::as_array)
3370 .ok_or("browser_apply_behavior_json inputSchema missing required array")?;
3371 assert!(
3372 required.iter().any(|v| v == "behavior"),
3373 "behavior must be required in browser_apply_behavior_json"
3374 );
3375 Ok(())
3376 }
3377
3378 #[cfg(feature = "mcp-attach")]
3379 #[test]
3380 fn tool_defs_include_browser_attach() {
3381 let defs = &*TOOL_DEFINITIONS;
3382 assert!(
3383 defs.iter()
3384 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_attach")),
3385 "TOOL_DEFINITIONS must contain browser_attach when mcp-attach is enabled"
3386 );
3387 }
3388
3389 #[cfg(feature = "mcp-attach")]
3390 #[test]
3391 fn browser_attach_schema_includes_target_profile()
3392 -> std::result::Result<(), Box<dyn std::error::Error>> {
3393 let defs = &*TOOL_DEFINITIONS;
3394 let def = defs
3395 .iter()
3396 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_attach"))
3397 .ok_or("browser_attach must be in TOOL_DEFINITIONS")?;
3398 let props = def
3399 .get("inputSchema")
3400 .and_then(|s| s.get("properties"))
3401 .and_then(Value::as_object)
3402 .ok_or("browser_attach inputSchema missing properties")?;
3403 let target_profile = props
3404 .get("target_profile")
3405 .ok_or("browser_attach inputSchema missing target_profile")?;
3406 let enum_values = target_profile
3407 .get("enum")
3408 .and_then(Value::as_array)
3409 .ok_or("browser_attach target_profile missing enum")?;
3410
3411 assert!(
3412 enum_values.iter().any(|v| v == "default"),
3413 "browser_attach target_profile enum must include default"
3414 );
3415 assert!(
3416 enum_values.iter().any(|v| v == "reddit"),
3417 "browser_attach target_profile enum must include reddit"
3418 );
3419 Ok(())
3420 }
3421}