1use std::{
73 collections::HashMap,
74 sync::{Arc, LazyLock},
75 time::Duration,
76};
77
78use chromiumoxide::Browser;
79use serde::{Deserialize, Serialize};
80use serde_json::{Value, json};
81use tokio::{
82 io::{AsyncBufReadExt, AsyncWriteExt, BufReader},
83 sync::Mutex,
84 task::JoinHandle,
85 time::sleep,
86};
87use tracing::{debug, info};
88use ulid::Ulid;
89
90#[cfg(feature = "mcp-attach")]
91use futures::StreamExt;
92
93use crate::{
94 AcquisitionMode, AcquisitionRequest, AcquisitionResult, AcquisitionRunner, BrowserConfig,
95 BrowserHandle, BrowserPool,
96 behavior::{InteractionLevel, InteractionSimulator},
97 behavior_adapter::{BehaviorInteractionLevel, PolymorphicBehaviorAdapter},
98 config::StealthLevel,
99 error::{BrowserError, Result},
100 page::WaitUntil,
101 session::{SessionSnapshot, restore_session, save_session},
102};
103
104#[derive(Debug, Deserialize)]
108pub struct JsonRpcRequest {
109 pub jsonrpc: String,
111 pub method: String,
113 #[serde(default)]
115 pub params: Value,
116 #[serde(default)]
118 pub id: Value,
119}
120
121#[derive(Debug, Serialize)]
123pub struct JsonRpcResponse {
124 jsonrpc: &'static str,
125 #[serde(skip_serializing_if = "Option::is_none")]
126 result: Option<Value>,
127 #[serde(skip_serializing_if = "Option::is_none")]
128 error: Option<JsonRpcError>,
129 id: Value,
130}
131
132#[derive(Debug, Serialize)]
134pub struct JsonRpcError {
135 code: i32,
136 message: String,
137 #[serde(skip_serializing_if = "Option::is_none")]
138 data: Option<Value>,
139}
140
141impl JsonRpcResponse {
142 const fn ok(id: Value, result: Value) -> Self {
143 Self {
144 jsonrpc: "2.0",
145 result: Some(result),
146 error: None,
147 id,
148 }
149 }
150
151 fn err(id: Value, code: i32, message: impl Into<String>) -> Self {
152 Self {
153 jsonrpc: "2.0",
154 result: None,
155 error: Some(JsonRpcError {
156 code,
157 message: message.into(),
158 data: None,
159 }),
160 id,
161 }
162 }
163
164 fn method_not_found(id: Value, method: &str) -> Self {
165 Self::err(id, -32601, format!("Method not found: {method}"))
166 }
167}
168
169struct McpSession {
177 handle: Arc<Mutex<Option<BrowserHandle>>>,
179 attached_browser: Arc<Mutex<Option<Browser>>>,
181 attached_handler_task: Arc<Mutex<Option<JoinHandle<()>>>>,
183 page: Arc<Mutex<Option<crate::page::PageHandle>>>,
185 stealth_level: StealthLevel,
187 tls_profile: Option<String>,
189 webrtc_policy: Option<String>,
191 cdp_fix_mode: Option<String>,
193 proxy: Option<String>,
195 target_profile: String,
197 current_url: Option<String>,
199 saved_snapshot: Option<SessionSnapshot>,
201 attach_endpoint: Option<String>,
203 behavior_plan: Option<crate::behavior_adapter::AppliedBehaviorPlan>,
205}
206
207static TOOL_DEFINITIONS: LazyLock<Vec<Value>> = LazyLock::new(|| {
226 let mut tools = vec![
227 json!({
228 "name": "browser_acquire",
229 "description": "Acquire a browser from the pool and open a session. The optional parameters are stored as session metadata labels and echoed back in the response; they do not reconfigure the pool-acquired browser at runtime. Use them to annotate sessions (e.g. for `browser_verify_stealth` attribution).",
230 "inputSchema": {
231 "type": "object",
232 "properties": {
233 "stealth_level": {
234 "type": "string",
235 "enum": ["none", "basic", "advanced"],
236 "description": "Anti-detection intensity. Defaults to 'advanced'."
237 },
238 "tls_profile": {
239 "type": "string",
240 "description": "TLS fingerprint profile label (free-form; requires stealth feature; browser-launch-level). Examples: chrome131, firefox133, safari18, edge131."
241 },
242 "webrtc_policy": {
243 "type": "string",
244 "description": "WebRTC IP-leak policy label (free-form; requires stealth feature; browser-launch-level). Examples: allow_all, disable_non_proxied, block_all."
245 },
246 "cdp_fix_mode": {
247 "type": "string",
248 "enum": ["addBinding", "isolatedWorld", "enableDisable", "none"],
249 "description": "CDP Runtime.enable leak-mitigation mode."
250 },
251 "proxy": {
252 "type": "string",
253 "description": "HTTP/SOCKS proxy URL, e.g. 'http://user:pass@host:port'. Only pass this when the user has explicitly requested proxy use or you have already acquired a proxy via proxy_acquire. Do NOT populate this field by default."
254 },
255 "target_profile": {
256 "type": "string",
257 "enum": ["default", "reddit"],
258 "description": "Optional target tuning profile. 'reddit' enables challenge-aware waits and stabilization tuned for Reddit flows."
259 }
260 },
261 "required": []
262 }
263 }),
264 json!({
265 "name": "browser_navigate",
266 "description": "Navigate to a URL within a session. Opens a new page if needed.",
267 "inputSchema": {
268 "type": "object",
269 "properties": {
270 "session_id": { "type": "string" },
271 "url": { "type": "string" },
272 "timeout_secs": { "type": "integer", "default": 30 }
273 },
274 "required": ["session_id", "url"]
275 }
276 }),
277 json!({
278 "name": "browser_acquire_and_extract",
279 "description": "Run the opinionated acquisition ladder and return structured extraction/content output in one call. Uses AcquisitionRunner facade with deterministic strategy escalation.",
280 "inputSchema": {
281 "type": "object",
282 "properties": {
283 "url": { "type": "string", "description": "Target URL to acquire." },
284 "mode": {
285 "type": "string",
286 "enum": ["fast", "resilient", "hostile", "investigate"],
287 "description": "Acquisition ladder mode."
288 },
289 "wait_for_selector": {
290 "type": "string",
291 "description": "Optional selector wait gate for browser-stage success."
292 },
293 "selector_wait": {
294 "type": "string",
295 "description": "Alias for wait_for_selector."
296 },
297 "extraction_js": {
298 "type": "string",
299 "description": "Optional JavaScript extraction expression evaluated in browser stages."
300 },
301 "total_timeout_secs": {
302 "type": "number",
303 "default": 45,
304 "description": "Optional wall-clock timeout for the full acquisition run."
305 }
306 },
307 "required": ["url", "mode"]
308 }
309 }),
310 json!({
311 "name": "browser_eval",
312 "description": "Evaluate JavaScript in the current page of a session.",
313 "inputSchema": {
314 "type": "object",
315 "properties": {
316 "session_id": { "type": "string" },
317 "script": { "type": "string" }
318 },
319 "required": ["session_id", "script"]
320 }
321 }),
322 json!({
323 "name": "browser_screenshot",
324 "description": "Capture a full-page PNG screenshot. Returns base64-encoded PNG.",
325 "inputSchema": {
326 "type": "object",
327 "properties": {
328 "session_id": { "type": "string" }
329 },
330 "required": ["session_id"]
331 }
332 }),
333 json!({
334 "name": "browser_content",
335 "description": "Get the full HTML content of the current page.",
336 "inputSchema": {
337 "type": "object",
338 "properties": {
339 "session_id": { "type": "string" }
340 },
341 "required": ["session_id"]
342 }
343 }),
344 #[cfg(feature = "mcp-attach")]
345 json!({
346 "name": "browser_attach",
347 "description": "Attach MCP workflows to an existing user browser/profile context. `cdp_ws` mode is implemented and creates a live attached session; `extension_bridge` remains a contract-only path.",
348 "inputSchema": {
349 "type": "object",
350 "properties": {
351 "mode": {
352 "type": "string",
353 "enum": ["extension_bridge", "cdp_ws"],
354 "description": "Attach strategy. extension_bridge is the recommended future path for existing user profiles. cdp_ws targets a remote debugging websocket endpoint."
355 },
356 "endpoint": {
357 "type": "string",
358 "description": "Optional endpoint for cdp_ws mode, e.g. ws://127.0.0.1:9222/devtools/browser/<id>."
359 },
360 "profile_hint": {
361 "type": "string",
362 "description": "Optional human-readable profile label (e.g. 'reddit-main')."
363 },
364 "target_profile": {
365 "type": "string",
366 "enum": ["default", "reddit"],
367 "description": "Optional target tuning profile used by session navigation helpers."
368 }
369 },
370 "required": ["mode"]
371 }
372 }),
373 json!({
374 "name": "browser_auth_session",
375 "description": "High-level auth/session workflow wrapper. Use mode='capture' to persist login state and mode='resume' to restore it.",
376 "inputSchema": {
377 "type": "object",
378 "properties": {
379 "session_id": { "type": "string" },
380 "mode": { "type": "string", "enum": ["capture", "resume"] },
381 "file_path": { "type": "string", "description": "Optional snapshot file path for durable persistence." },
382 "ttl_secs": { "type": "integer", "description": "Optional TTL (seconds) when capturing." },
383 "navigate_to_origin": { "type": "boolean", "default": true, "description": "When resuming, navigate to snapshot origin before restore." },
384 "interaction_level": { "type": "string", "enum": ["none", "low", "medium", "high"], "default": "none", "description": "Optional post-operation human-like interaction step." }
385 },
386 "required": ["session_id", "mode"]
387 }
388 }),
389 json!({
390 "name": "browser_release",
391 "description": "Release a browser session back to the pool.",
392 "inputSchema": {
393 "type": "object",
394 "properties": {
395 "session_id": { "type": "string" }
396 },
397 "required": ["session_id"]
398 }
399 }),
400 json!({
401 "name": "pool_stats",
402 "description": "Return current browser pool statistics.",
403 "inputSchema": {
404 "type": "object",
405 "properties": {},
406 "required": []
407 }
408 }),
409 ];
410 tools.push(json!({
411 "name": "browser_query",
412 "description": "Navigate to a URL, query all elements matching a CSS selector, and return their text content or specific attributes. If `fields` is omitted each result is a plain string (the text content). If `fields` is supplied each result is an object with one key per field.",
413 "inputSchema": {
414 "type": "object",
415 "properties": {
416 "session_id": { "type": "string" },
417 "url": { "type": "string" },
418 "selector": { "type": "string", "description": "CSS selector passed to querySelectorAll." },
419 "fields": {
420 "type": "object",
421 "description": "Map of output field name → { \"attr\": \"attribute-name\" }. Omit `attr` to get text content for that field.",
422 "additionalProperties": {
423 "type": "object",
424 "properties": { "attr": { "type": "string" } }
425 }
426 },
427 "limit": { "type": "integer", "default": 50, "description": "Maximum number of nodes to return." },
428 "timeout_secs": { "type": "number", "default": 30 }
429 },
430 "required": ["session_id", "url", "selector"]
431 }
432 }));
433 tools.push(json!({
434 "name": "browser_extract",
435 "description": "Navigate to a URL and perform schema-driven structured extraction. Each element matching `root_selector` becomes one result object; fields within each root are resolved by their own sub-selectors relative to the root. This is the runtime equivalent of the `#[derive(Extract)]` macro.",
436 "inputSchema": {
437 "type": "object",
438 "properties": {
439 "session_id": { "type": "string" },
440 "url": { "type": "string" },
441 "root_selector": { "type": "string", "description": "CSS selector whose matches become the root of each result object." },
442 "schema": {
443 "type": "object",
444 "description": "Map of field name → { \"selector\": \"...\", \"attr\": \"...\", \"required\": true/false }.",
445 "additionalProperties": {
446 "type": "object",
447 "properties": {
448 "selector": { "type": "string" },
449 "attr": { "type": "string" },
450 "required": { "type": "boolean", "default": false }
451 },
452 "required": ["selector"]
453 }
454 },
455 "timeout_secs": { "type": "number", "default": 30 }
456 },
457 "required": ["session_id", "url", "root_selector", "schema"]
458 }
459 }));
460 tools.push(json!({
461 "name": "browser_extract_with_fallback",
462 "description": "Like browser_extract but accepts multiple root selectors (tried in order). Returns the first selector that produces results. Useful when a site layout may have changed and you want to try modern markup before falling back to legacy selectors.",
463 "inputSchema": {
464 "type": "object",
465 "properties": {
466 "session_id": { "type": "string" },
467 "url": { "type": "string" },
468 "root_selectors": {
469 "type": "array",
470 "items": { "type": "string" },
471 "description": "CSS selectors tried in order; the first that produces results is used.",
472 "minItems": 1
473 },
474 "schema": {
475 "type": "object",
476 "description": "Map of field name → { \"selector\": \"...\", \"attr\": \"...\", \"required\": true/false }.",
477 "additionalProperties": {
478 "type": "object",
479 "properties": {
480 "selector": { "type": "string" },
481 "attr": { "type": "string" },
482 "required": { "type": "boolean", "default": false }
483 },
484 "required": ["selector"]
485 }
486 },
487 "timeout_secs": { "type": "number", "default": 30 }
488 },
489 "required": ["session_id", "url", "root_selectors", "schema"]
490 }
491 }));
492 tools.push(json!({
493 "name": "browser_extract_resilient",
494 "description": "Like browser_extract but skips root nodes where *all* required schema fields are absent (partial records). Useful for heterogeneous lists where some items lack an optional field.",
495 "inputSchema": {
496 "type": "object",
497 "properties": {
498 "session_id": { "type": "string" },
499 "url": { "type": "string" },
500 "root_selector": { "type": "string", "description": "CSS selector whose matches become the root of each result object." },
501 "schema": {
502 "type": "object",
503 "description": "Map of field name → { \"selector\": \"...\", \"attr\": \"...\", \"required\": true/false }.",
504 "additionalProperties": {
505 "type": "object",
506 "properties": {
507 "selector": { "type": "string" },
508 "attr": { "type": "string" },
509 "required": { "type": "boolean", "default": false }
510 },
511 "required": ["selector"]
512 }
513 },
514 "timeout_secs": { "type": "number", "default": 30 }
515 },
516 "required": ["session_id", "url", "root_selector", "schema"]
517 }
518 }));
519 #[cfg(feature = "similarity")]
521 tools.push(json!({
522 "name": "browser_find_similar",
523 "description": "Navigate to a URL and find DOM elements that are structurally similar to a reference element (identified by a CSS selector). Useful when a site has been redesigned and stored selectors no longer match. Requires the `similarity` feature.",
524 "inputSchema": {
525 "type": "object",
526 "properties": {
527 "session_id": { "type": "string" },
528 "url": { "type": "string" },
529 "reference_selector": { "type": "string", "description": "CSS selector identifying the reference node. The first match is used." },
530 "threshold": { "type": "number", "default": 0.7, "description": "Minimum similarity score [0.0, 1.0]." },
531 "max_results": { "type": "integer", "default": 10 },
532 "timeout_secs": { "type": "number", "default": 30 }
533 },
534 "required": ["session_id", "url", "reference_selector"]
535 }
536 }));
537 #[cfg(feature = "stealth")]
539 tools.push(json!({
540 "name": "browser_verify_stealth",
541 "description": "Navigate to a URL and run built-in stealth checks with optional transport diagnostics (JA3/JA4/HTTP3). Returns a DiagnosticReport with pass/fail results, coverage percentage, transport mismatch details, and known_limitations for visible-but-not-yet-covered surfaces.",
542 "inputSchema": {
543 "type": "object",
544 "properties": {
545 "session_id": { "type": "string" },
546 "url": { "type": "string", "description": "URL to navigate to before running checks." },
547 "timeout_secs": { "type": "integer", "default": 15, "description": "Navigation timeout in seconds." },
548 "observed_ja3_hash": { "type": "string", "description": "Optional observed JA3 hash to compare against expected profile." },
549 "observed_ja4": { "type": "string", "description": "Optional observed JA4 fingerprint to compare against expected profile." },
550 "observed_http3_perk_text": { "type": "string", "description": "Optional observed HTTP/3 perk text (SETTINGS|PSEUDO_HEADERS)." },
551 "observed_http3_perk_hash": { "type": "string", "description": "Optional observed HTTP/3 perk hash." }
552 },
553 "required": ["session_id", "url"]
554 }
555 }));
556 #[cfg(feature = "stealth")]
558 tools.push(json!({
559 "name": "browser_validate_stealth",
560 "description": "Run anti-bot service validators against the pool (Tier 1: CreepJS, BrowserScan). Returns a summary report.",
561 "inputSchema": {
562 "type": "object",
563 "properties": {
564 "targets": {
565 "type": "array",
566 "items": { "type": "string", "enum": ["creepjs", "browserscan", "fingerprint_js", "kasada", "cloudflare", "akamai", "data_dome", "perimeter_x"] },
567 "description": "List of services to validate. Empty = Tier 1 only. Tier 2+ tests may rate-limit.",
568 "default": ["creepjs", "browserscan"]
569 },
570 "tier1_only": {
571 "type": "boolean",
572 "default": false,
573 "description": "If true, force regression-safe Tier 1 targets only (CreepJS + BrowserScan)."
574 },
575 "timeout_secs": { "type": "integer", "default": 30, "description": "Per-target timeout in seconds." }
576 },
577 "required": []
578 }
579 }));
580 tools.push(json!({
582 "name": "browser_warmup",
583 "description": "Warm up a browser session by navigating to a URL and optionally waiting for dynamic resources to settle. Warmup is idempotent — calling it again re-warms the same session.",
584 "inputSchema": {
585 "type": "object",
586 "properties": {
587 "session_id": { "type": "string" },
588 "url": { "type": "string", "description": "URL to navigate to during warmup." },
589 "wait": {
590 "type": "string",
591 "enum": ["dom_content_loaded", "network_idle"],
592 "default": "dom_content_loaded",
593 "description": "Wait strategy after navigation."
594 },
595 "timeout_ms": { "type": "integer", "default": 30000, "description": "Navigation timeout in milliseconds." },
596 "stabilize_ms": { "type": "integer", "default": 0, "description": "Additional pause after navigation for dynamic resources to settle (0 = skip)." }
597 },
598 "required": ["session_id", "url"]
599 }
600 }));
601 tools.push(json!({
602 "name": "browser_refresh",
603 "description": "Refresh the current page while retaining cookies and session storage. Optionally re-navigates to force a new TCP connection.",
604 "inputSchema": {
605 "type": "object",
606 "properties": {
607 "session_id": { "type": "string" },
608 "wait": {
609 "type": "string",
610 "enum": ["dom_content_loaded", "network_idle"],
611 "default": "dom_content_loaded",
612 "description": "Wait strategy after reload."
613 },
614 "timeout_ms": { "type": "integer", "default": 30000, "description": "Reload timeout in milliseconds." },
615 "reset_connection": { "type": "boolean", "default": false, "description": "When true, re-navigates to force a new TCP connection instead of in-place reload." }
616 },
617 "required": ["session_id"]
618 }
619 }));
620 tools.push(json!({
621 "name": "browser_session_save",
622 "description": "Save current browser session state (cookies + localStorage) to memory and optionally to disk.",
623 "inputSchema": {
624 "type": "object",
625 "properties": {
626 "session_id": { "type": "string" },
627 "ttl_secs": { "type": "integer", "description": "Optional snapshot TTL in seconds." },
628 "file_path": { "type": "string", "description": "Optional path to save session snapshot JSON." },
629 "include_snapshot": { "type": "boolean", "default": false, "description": "When true, include full snapshot payload in response." }
630 },
631 "required": ["session_id"]
632 }
633 }));
634 tools.push(json!({
635 "name": "browser_session_restore",
636 "description": "Restore browser session state from provided snapshot JSON, saved in-memory snapshot, or file.",
637 "inputSchema": {
638 "type": "object",
639 "properties": {
640 "session_id": { "type": "string" },
641 "snapshot": { "type": "object", "description": "Inline SessionSnapshot JSON." },
642 "file_path": { "type": "string", "description": "Path to a SessionSnapshot JSON file." },
643 "use_saved": { "type": "boolean", "default": true, "description": "Use in-memory snapshot when no inline/file snapshot is provided." },
644 "navigate_to_origin": { "type": "boolean", "default": true, "description": "Navigate to snapshot origin before restore when origin is present." }
645 },
646 "required": ["session_id"]
647 }
648 }));
649 tools.push(json!({
650 "name": "browser_humanize",
651 "description": "Apply human-like interaction sequence on current page (scroll, key activity, mouse movement).",
652 "inputSchema": {
653 "type": "object",
654 "properties": {
655 "session_id": { "type": "string" },
656 "level": { "type": "string", "enum": ["none", "low", "medium", "high"], "default": "low" },
657 "viewport_width": { "type": "number", "default": 1366.0 },
658 "viewport_height": { "type": "number", "default": 768.0 }
659 },
660 "required": ["session_id"]
661 }
662 }));
663 tools.push(json!({
664 "name": "browser_apply_behavior_json",
665 "description": "Apply structured behavior JSON (runtime policy, investigation bundle, or direct overrides) using the polymorphic behavior adapter. Returns an applied plan and effective browser config. If session_id is provided, session metadata is updated for downstream tools.",
666 "inputSchema": {
667 "type": "object",
668 "properties": {
669 "behavior": {
670 "type": "object",
671 "description": "Structured behavior input: RuntimePolicy object, InvestigationBundle object with nested policy, or direct override object."
672 },
673 "session_id": {
674 "type": "string",
675 "description": "Optional active session to annotate with the applied behavior plan."
676 }
677 },
678 "required": ["behavior"]
679 }
680 }));
681 tools
682});
683
684pub struct McpBrowserServer {
685 pool: Arc<BrowserPool>,
686 sessions: Arc<Mutex<HashMap<String, McpSession>>>,
687}
688
689struct ExtractFieldDef {
691 selector: String,
692 attr: Option<String>,
693 required: bool,
694}
695
696impl McpBrowserServer {
697 pub fn new(pool: Arc<BrowserPool>) -> Self {
701 Self {
702 pool,
703 sessions: Arc::new(Mutex::new(HashMap::new())),
704 }
705 }
706
707 pub async fn run(&self) -> Result<()> {
716 info!("MCP browser server starting (stdin/stdout mode)");
717
718 let stdin = tokio::io::stdin();
719 let stdout = tokio::io::stdout();
720 let mut reader = BufReader::new(stdin).lines();
721 let mut stdout = stdout;
722
723 while let Some(line) = reader.next_line().await.map_err(BrowserError::Io)? {
724 let line = line.trim().to_string();
725 if line.is_empty() {
726 continue;
727 }
728
729 debug!(?line, "MCP request");
730
731 let response = match serde_json::from_str::<Value>(&line) {
732 Ok(req) => {
733 let is_well_formed_notification = req.is_object()
734 && req.get("jsonrpc").and_then(Value::as_str) == Some("2.0")
735 && req.get("id").is_none()
736 && req.get("method").and_then(Value::as_str).is_some();
737 let response = self.dispatch(&req).await;
738 if is_well_formed_notification {
739 continue;
740 }
741 response
742 }
743 Err(e) => serde_json::to_value(JsonRpcResponse::err(
744 Value::Null,
745 -32700,
746 format!("Parse error: {e}"),
747 ))
748 .unwrap_or_else(|_| {
749 json!({"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}})
750 }),
751 };
752
753 let mut out = serde_json::to_string(&response).unwrap_or_default();
754 out.push('\n');
755 stdout
756 .write_all(out.as_bytes())
757 .await
758 .map_err(BrowserError::Io)?;
759 stdout.flush().await.map_err(BrowserError::Io)?;
760 }
761
762 info!("MCP browser server stopping (stdin closed)");
763 Ok(())
764 }
765
766 pub async fn dispatch(&self, req: &Value) -> Value {
789 let typed: JsonRpcRequest = match serde_json::from_value(req.clone()) {
790 Ok(r) => r,
791 Err(e) => {
792 return json!({
793 "jsonrpc": "2.0",
794 "id": req.get("id").cloned().unwrap_or(Value::Null),
795 "error": { "code": -32700, "message": format!("Parse error: {e}") }
796 });
797 }
798 };
799 let resp = self.handle_request(typed).await;
800 serde_json::to_value(resp).unwrap_or_else(|_| json!({"jsonrpc":"2.0","id":null,"error":{"code":-32603,"message":"Internal error"}}))
801 }
802
803 async fn handle_request(&self, req: JsonRpcRequest) -> JsonRpcResponse {
804 let id = req.id.clone();
805 match req.method.as_str() {
806 "initialize" => Self::handle_initialize(id),
807 "tools/list" => Self::handle_tools_list(id),
808 "tools/call" => self.handle_tools_call(id, req.params).await,
809 "resources/list" => self.handle_resources_list(id).await,
810 "resources/read" => self.handle_resources_read(id, req.params).await,
811 "notifications/initialized" | "ping" => {
812 JsonRpcResponse::ok(id, json!({}))
814 }
815 other => JsonRpcResponse::method_not_found(id, other),
816 }
817 }
818
819 fn handle_initialize(id: Value) -> JsonRpcResponse {
822 JsonRpcResponse::ok(
823 id,
824 json!({
825 "protocolVersion": "2025-11-25",
826 "capabilities": {
827 "tools": { "listChanged": false },
828 "resources": { "listChanged": false, "subscribe": false }
829 },
830 "serverInfo": {
831 "name": "stygian-browser",
832 "version": env!("CARGO_PKG_VERSION")
833 }
834 }),
835 )
836 }
837
838 fn handle_tools_list(id: Value) -> JsonRpcResponse {
841 JsonRpcResponse::ok(id, json!({ "tools": &*TOOL_DEFINITIONS }))
842 }
843
844 async fn handle_tools_call(&self, id: Value, params: Value) -> JsonRpcResponse {
847 let name = match params.get("name").and_then(|v| v.as_str()) {
848 Some(n) => n.to_string(),
849 None => return JsonRpcResponse::err(id, -32602, "Missing tool 'name'"),
850 };
851 let args = params
852 .get("arguments")
853 .cloned()
854 .unwrap_or_else(|| json!({}));
855
856 let result = match name.as_str() {
857 "browser_acquire" => self.tool_browser_acquire(&args).await,
858 "browser_acquire_and_extract" => self.tool_browser_acquire_and_extract(&args).await,
859 "browser_navigate" => self.tool_browser_navigate(&args).await,
860 "browser_eval" => self.tool_browser_eval(&args).await,
861 "browser_screenshot" => self.tool_browser_screenshot(&args).await,
862 "browser_content" => self.tool_browser_content(&args).await,
863 #[cfg(feature = "mcp-attach")]
864 "browser_attach" => self.tool_browser_attach(&args).await,
865 #[cfg(not(feature = "mcp-attach"))]
866 "browser_attach" => Err(BrowserError::ConfigError(
867 "browser_attach requires the 'mcp-attach' feature".to_string(),
868 )),
869 "browser_auth_session" => self.tool_browser_auth_session(&args).await,
870 "browser_session_save" => self.tool_browser_session_save(&args).await,
871 "browser_session_restore" => self.tool_browser_session_restore(&args).await,
872 "browser_apply_behavior_json" => self.tool_browser_apply_behavior_json(&args).await,
873 "browser_humanize" => self.tool_browser_humanize(&args).await,
874 #[cfg(feature = "stealth")]
875 "browser_verify_stealth" => self.tool_browser_verify_stealth(&args).await,
876 #[cfg(not(feature = "stealth"))]
877 "browser_verify_stealth" => Err(BrowserError::ConfigError(
878 "browser_verify_stealth requires the 'stealth' feature".to_string(),
879 )),
880 #[cfg(feature = "stealth")]
881 "browser_validate_stealth" => self.tool_browser_validate_stealth(&args).await,
882 #[cfg(not(feature = "stealth"))]
883 "browser_validate_stealth" => Err(BrowserError::ConfigError(
884 "browser_validate_stealth requires the 'stealth' feature".to_string(),
885 )),
886 "browser_release" => self.tool_browser_release(&args).await,
887 "pool_stats" => Ok(self.tool_pool_stats()),
888 "browser_query" => self.tool_browser_query(&args).await,
889 "browser_extract" => self.tool_browser_extract(&args).await,
890 "browser_extract_with_fallback" => self.tool_browser_extract_with_fallback(&args).await,
891 "browser_extract_resilient" => self.tool_browser_extract_resilient(&args).await,
892 #[cfg(feature = "similarity")]
893 "browser_find_similar" => self.tool_browser_find_similar(&args).await,
894 "browser_warmup" => self.tool_browser_warmup(&args).await,
895 "browser_refresh" => self.tool_browser_refresh(&args).await,
896 other => Err(BrowserError::ConfigError(format!("Unknown tool: {other}"))),
897 };
898
899 match result {
900 Ok(content) => JsonRpcResponse::ok(
901 id,
902 json!({ "content": [{ "type": "text", "text": content.to_string() }], "isError": false }),
903 ),
904 Err(e) => JsonRpcResponse::ok(
905 id,
906 json!({ "content": [{ "type": "text", "text": e.to_string() }], "isError": true }),
907 ),
908 }
909 }
910
911 async fn tool_browser_acquire(&self, args: &Value) -> Result<Value> {
912 let stealth_level = args
914 .get("stealth_level")
915 .and_then(|v| v.as_str())
916 .map(|s| match s {
917 "none" => StealthLevel::None,
918 "basic" => StealthLevel::Basic,
919 _ => StealthLevel::Advanced,
920 })
921 .unwrap_or_default();
922 let tls_profile = args
923 .get("tls_profile")
924 .and_then(|v| v.as_str())
925 .map(ToString::to_string);
926 let webrtc_policy = args
927 .get("webrtc_policy")
928 .and_then(|v| v.as_str())
929 .map(ToString::to_string);
930 let cdp_fix_mode = args
931 .get("cdp_fix_mode")
932 .and_then(|v| v.as_str())
933 .map(ToString::to_string);
934 let proxy = args
935 .get("proxy")
936 .and_then(|v| v.as_str())
937 .map(ToString::to_string);
938 let target_profile = args
939 .get("target_profile")
940 .and_then(|v| v.as_str())
941 .map_or_else(
942 || "default".to_string(),
943 |s| {
944 if s.eq_ignore_ascii_case("reddit") {
945 "reddit".to_string()
946 } else {
947 "default".to_string()
948 }
949 },
950 );
951
952 let handle = self.pool.acquire().await?;
953 let session_id = Ulid::new().to_string();
954
955 let effective_stealth = format!("{stealth_level:?}").to_lowercase();
956 self.sessions.lock().await.insert(
957 session_id.clone(),
958 McpSession {
959 handle: Arc::new(Mutex::new(Some(handle))),
960 attached_browser: Arc::new(Mutex::new(None)),
961 attached_handler_task: Arc::new(Mutex::new(None)),
962 page: Arc::new(Mutex::new(None)),
963 stealth_level,
964 tls_profile: tls_profile.clone(),
965 webrtc_policy: webrtc_policy.clone(),
966 cdp_fix_mode: cdp_fix_mode.clone(),
967 proxy: proxy.clone(),
968 target_profile: target_profile.clone(),
969 current_url: None,
970 saved_snapshot: None,
971 attach_endpoint: None,
972 behavior_plan: None,
973 },
974 );
975
976 info!(%session_id, %effective_stealth, "MCP session acquired");
977 Ok(json!({
978 "session_id": session_id,
979 "requested_metadata": {
980 "stealth_level": effective_stealth,
981 "tls_profile": tls_profile,
982 "webrtc_policy": webrtc_policy,
983 "cdp_fix_mode": cdp_fix_mode,
984 "proxy": proxy,
985 "target_profile": target_profile
986 }
987 }))
988 }
989
990 async fn tool_browser_acquire_and_extract(&self, args: &Value) -> Result<Value> {
991 let request = Self::parse_acquisition_request(args)?;
992 let runner = AcquisitionRunner::new(self.pool.clone());
993 let result = runner.run(request).await;
994 Ok(Self::acquisition_result_to_tool_output(&result))
995 }
996
997 #[cfg(feature = "stealth")]
998 async fn tool_browser_verify_stealth(&self, args: &Value) -> Result<Value> {
999 let session_id = Self::require_str(args, "session_id")?;
1000 let url = Self::require_str(args, "url")?;
1001 let timeout_secs = args
1002 .get("timeout_secs")
1003 .and_then(serde_json::Value::as_u64)
1004 .unwrap_or(15);
1005 let observed = crate::diagnostic::TransportObservations {
1006 ja3_hash: args
1007 .get("observed_ja3_hash")
1008 .and_then(serde_json::Value::as_str)
1009 .map(ToString::to_string),
1010 ja4: args
1011 .get("observed_ja4")
1012 .and_then(serde_json::Value::as_str)
1013 .map(ToString::to_string),
1014 http3_perk_text: args
1015 .get("observed_http3_perk_text")
1016 .and_then(serde_json::Value::as_str)
1017 .map(ToString::to_string),
1018 http3_perk_hash: args
1019 .get("observed_http3_perk_hash")
1020 .and_then(serde_json::Value::as_str)
1021 .map(ToString::to_string),
1022 };
1023
1024 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1025 self.session_runtime(&session_id).await?;
1026 let requested_stealth = self.session_handle_and_stealth(&session_id).await?.1;
1027
1028 self.ensure_session_page(
1029 &session_id,
1030 &session_arc,
1031 &attached_browser_arc,
1032 &page_arc,
1033 None,
1034 Duration::from_secs(timeout_secs),
1035 reddit_profile,
1036 )
1037 .await?;
1038
1039 {
1040 let mut page_guard = page_arc.lock().await;
1041 let page = page_guard.as_mut().ok_or_else(|| {
1042 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1043 })?;
1044 Self::navigate_with_profile(
1045 page,
1046 &url,
1047 Duration::from_secs(timeout_secs),
1048 reddit_profile,
1049 )
1050 .await?;
1051 drop(page_guard);
1052 }
1053
1054 let mut result = {
1055 let page_guard = page_arc.lock().await;
1056 let page = page_guard.as_ref().ok_or_else(|| {
1057 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1058 })?;
1059 let result = Self::run_stealth_diagnostic(page, observed).await;
1060 drop(page_guard);
1061 result
1062 };
1063
1064 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1065 session.current_url = Some(url.clone());
1066 }
1067
1068 if let Ok(ref mut v) = result
1070 && let Some(obj) = v.as_object_mut()
1071 {
1072 obj.insert(
1073 "requested_stealth_level".to_string(),
1074 Value::String(requested_stealth),
1075 );
1076 }
1077 result
1078 }
1079
1080 #[cfg(feature = "stealth")]
1081 async fn run_stealth_diagnostic(
1082 page: &crate::page::PageHandle,
1083 observed: crate::diagnostic::TransportObservations,
1084 ) -> Result<Value> {
1085 let report = page.verify_stealth_with_transport(Some(observed)).await?;
1086 serde_json::to_value(&report)
1087 .map_err(|e| BrowserError::ConfigError(format!("failed to serialize report: {e}")))
1088 }
1089
1090 async fn tool_browser_navigate(&self, args: &Value) -> Result<Value> {
1091 let session_id = Self::require_str(args, "session_id")?;
1092 let url = Self::require_str(args, "url")?;
1093 let timeout_secs = args
1094 .get("timeout_secs")
1095 .and_then(serde_json::Value::as_f64)
1096 .unwrap_or(30.0);
1097
1098 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1099 self.session_runtime(&session_id).await?;
1100
1101 self.ensure_session_page(
1102 &session_id,
1103 &session_arc,
1104 &attached_browser_arc,
1105 &page_arc,
1106 None,
1107 Duration::from_secs_f64(timeout_secs),
1108 reddit_profile,
1109 )
1110 .await?;
1111
1112 let (challenge_detected, challenge_cleared, title) = {
1113 let mut page_guard = page_arc.lock().await;
1114 let page = page_guard.as_mut().ok_or_else(|| {
1115 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1116 })?;
1117
1118 let (challenge_detected, challenge_cleared) = Self::navigate_with_profile(
1119 page,
1120 &url,
1121 Duration::from_secs_f64(timeout_secs),
1122 reddit_profile,
1123 )
1124 .await?;
1125 let title = page.title().await.unwrap_or_default();
1126 drop(page_guard);
1127 (challenge_detected, challenge_cleared, title)
1128 };
1129
1130 let current_url = url.clone();
1131
1132 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1135 session.current_url = Some(current_url.clone());
1136 }
1137
1138 Ok(json!({
1139 "title": title,
1140 "url": current_url,
1141 "challenge_detected": challenge_detected,
1142 "challenge_cleared": challenge_cleared
1143 }))
1144 }
1145
1146 async fn tool_browser_eval(&self, args: &Value) -> Result<Value> {
1147 let session_id = Self::require_str(args, "session_id")?;
1148 let script = Self::require_str(args, "script")?;
1149 let timeout_secs = args
1150 .get("timeout_secs")
1151 .and_then(serde_json::Value::as_f64)
1152 .unwrap_or(30.0);
1153
1154 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1155 self.session_runtime(&session_id).await?;
1156 let nav_url = nav_url_opt.ok_or_else(|| {
1157 BrowserError::ConfigError(
1158 "No page loaded — call browser_navigate before browser_eval".to_string(),
1159 )
1160 })?;
1161
1162 self.ensure_session_page(
1163 &session_id,
1164 &session_arc,
1165 &attached_browser_arc,
1166 &page_arc,
1167 Some(nav_url.as_str()),
1168 Duration::from_secs_f64(timeout_secs),
1169 reddit_profile,
1170 )
1171 .await?;
1172
1173 let mut page_guard = page_arc.lock().await;
1174 let page = page_guard.as_mut().ok_or_else(|| {
1175 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1176 })?;
1177 let result: Value = page.eval(&script).await?;
1178 drop(page_guard);
1179
1180 Ok(json!({ "result": result }))
1181 }
1182
1183 async fn tool_browser_screenshot(&self, args: &Value) -> Result<Value> {
1184 use base64::Engine as _;
1185 let session_id = Self::require_str(args, "session_id")?;
1186 let timeout_secs = args
1187 .get("timeout_secs")
1188 .and_then(serde_json::Value::as_f64)
1189 .unwrap_or(30.0);
1190
1191 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1192 self.session_runtime(&session_id).await?;
1193 let nav_url = nav_url_opt.ok_or_else(|| {
1194 BrowserError::ConfigError(
1195 "No page loaded — call browser_navigate before browser_screenshot".to_string(),
1196 )
1197 })?;
1198
1199 self.ensure_session_page(
1200 &session_id,
1201 &session_arc,
1202 &attached_browser_arc,
1203 &page_arc,
1204 Some(nav_url.as_str()),
1205 Duration::from_secs_f64(timeout_secs),
1206 reddit_profile,
1207 )
1208 .await?;
1209
1210 let mut page_guard = page_arc.lock().await;
1211 let page = page_guard.as_mut().ok_or_else(|| {
1212 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1213 })?;
1214 let png_bytes = page.screenshot().await?;
1215 drop(page_guard);
1216
1217 let encoded = base64::engine::general_purpose::STANDARD.encode(&png_bytes);
1218 Ok(json!({ "data": encoded, "mimeType": "image/png", "bytes": png_bytes.len() }))
1219 }
1220
1221 async fn tool_browser_content(&self, args: &Value) -> Result<Value> {
1222 let session_id = Self::require_str(args, "session_id")?;
1223 let timeout_secs = args
1224 .get("timeout_secs")
1225 .and_then(serde_json::Value::as_f64)
1226 .unwrap_or(30.0);
1227
1228 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1229 self.session_runtime(&session_id).await?;
1230 let nav_url = nav_url_opt.ok_or_else(|| {
1231 BrowserError::ConfigError(
1232 "No page loaded — call browser_navigate before browser_content".to_string(),
1233 )
1234 })?;
1235
1236 self.ensure_session_page(
1237 &session_id,
1238 &session_arc,
1239 &attached_browser_arc,
1240 &page_arc,
1241 Some(nav_url.as_str()),
1242 Duration::from_secs_f64(timeout_secs),
1243 reddit_profile,
1244 )
1245 .await?;
1246
1247 let mut page_guard = page_arc.lock().await;
1248 let page = page_guard.as_mut().ok_or_else(|| {
1249 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1250 })?;
1251 let html = page.content().await?;
1252 drop(page_guard);
1253
1254 Ok(json!({ "html": html, "bytes": html.len() }))
1255 }
1256
1257 #[cfg(feature = "mcp-attach")]
1258 async fn tool_browser_attach(&self, args: &Value) -> Result<Value> {
1259 let mode = Self::require_str(args, "mode")?;
1260 let endpoint = args
1261 .get("endpoint")
1262 .and_then(Value::as_str)
1263 .map(ToString::to_string);
1264 let profile_hint = args
1265 .get("profile_hint")
1266 .and_then(Value::as_str)
1267 .map(ToString::to_string);
1268
1269 let target_profile = args
1270 .get("target_profile")
1271 .and_then(Value::as_str)
1272 .map_or_else(
1273 || "default".to_string(),
1274 |s| {
1275 if s.eq_ignore_ascii_case("reddit") {
1276 "reddit".to_string()
1277 } else {
1278 "default".to_string()
1279 }
1280 },
1281 );
1282
1283 match mode.as_str() {
1284 "extension_bridge" => Ok(json!({
1285 "supported": false,
1286 "mode": mode,
1287 "profile_hint": profile_hint,
1288 "status": "not_implemented",
1289 "next_step": "Implement extension bridge handshake and profile transfer"
1290 })),
1291 "cdp_ws" => {
1292 let endpoint = endpoint.ok_or_else(|| {
1293 BrowserError::ConfigError("missing endpoint for cdp_ws mode".to_string())
1294 })?;
1295 if !(endpoint.starts_with("ws://") || endpoint.starts_with("wss://")) {
1296 return Err(BrowserError::ConfigError(
1297 "endpoint must start with ws:// or wss://".to_string(),
1298 ));
1299 }
1300
1301 let attach_timeout = Duration::from_secs(10);
1302 let (browser, mut handler) =
1303 tokio::time::timeout(attach_timeout, Browser::connect(endpoint.clone()))
1304 .await
1305 .map_err(|_| BrowserError::Timeout {
1306 operation: "Browser.connect".to_string(),
1307 duration_ms: 10_000,
1308 })?
1309 .map_err(|e| BrowserError::ConnectionError {
1310 url: endpoint.clone(),
1311 reason: e.to_string(),
1312 })?;
1313
1314 let handler_task = tokio::spawn(async move {
1315 while let Some(event) = handler.next().await {
1316 if let Err(error) = event {
1317 tracing::warn!(%error, "attached browser handler error");
1318 break;
1319 }
1320 }
1321 });
1322
1323 let session_id = Ulid::new().to_string();
1324 self.sessions.lock().await.insert(
1325 session_id.clone(),
1326 McpSession {
1327 handle: Arc::new(Mutex::new(None)),
1328 attached_browser: Arc::new(Mutex::new(Some(browser))),
1329 attached_handler_task: Arc::new(Mutex::new(Some(handler_task))),
1330 page: Arc::new(Mutex::new(None)),
1331 stealth_level: StealthLevel::None,
1332 tls_profile: None,
1333 webrtc_policy: None,
1334 cdp_fix_mode: None,
1335 proxy: None,
1336 target_profile: target_profile.clone(),
1337 current_url: None,
1338 saved_snapshot: None,
1339 attach_endpoint: Some(endpoint.clone()),
1340 behavior_plan: None,
1341 },
1342 );
1343
1344 Ok(json!({
1345 "supported": true,
1346 "mode": "cdp_ws",
1347 "session_id": session_id,
1348 "endpoint": endpoint,
1349 "profile_hint": profile_hint,
1350 "requested_metadata": {
1351 "target_profile": target_profile
1352 }
1353 }))
1354 }
1355 other => Err(BrowserError::ConfigError(format!(
1356 "Invalid mode '{other}'. Use one of: extension_bridge, cdp_ws"
1357 ))),
1358 }
1359 }
1360
1361 async fn tool_browser_auth_session(&self, args: &Value) -> Result<Value> {
1362 let session_id = Self::require_str(args, "session_id")?;
1363 let mode = Self::require_str(args, "mode")?;
1364 let file_path = args
1365 .get("file_path")
1366 .and_then(Value::as_str)
1367 .map(ToString::to_string);
1368 let ttl_secs = args.get("ttl_secs").and_then(Value::as_u64);
1369 let navigate_to_origin = args
1370 .get("navigate_to_origin")
1371 .and_then(Value::as_bool)
1372 .unwrap_or(true);
1373 let interaction_level = args
1374 .get("interaction_level")
1375 .and_then(Value::as_str)
1376 .unwrap_or("none")
1377 .to_string();
1378
1379 let payload = match mode.as_str() {
1380 "capture" => {
1381 let mut save_args = json!({
1382 "session_id": session_id,
1383 "include_snapshot": false
1384 });
1385 if let Some(ttl) = ttl_secs
1386 && let Some(obj) = save_args.as_object_mut()
1387 {
1388 obj.insert("ttl_secs".to_string(), Value::from(ttl));
1389 }
1390 if let Some(path) = file_path.clone()
1391 && let Some(obj) = save_args.as_object_mut()
1392 {
1393 obj.insert("file_path".to_string(), Value::String(path));
1394 }
1395
1396 let save = self.tool_browser_session_save(&save_args).await?;
1397
1398 let humanize = if interaction_level == "none" {
1399 None
1400 } else {
1401 let humanize_args = json!({
1402 "session_id": session_id,
1403 "level": interaction_level
1404 });
1405 Some(self.tool_browser_humanize(&humanize_args).await?)
1406 };
1407
1408 json!({
1409 "mode": "capture",
1410 "session_id": session_id,
1411 "save": save,
1412 "humanize": humanize
1413 })
1414 }
1415 "resume" => {
1416 let mut restore_args = json!({
1417 "session_id": session_id,
1418 "use_saved": file_path.is_none(),
1419 "navigate_to_origin": navigate_to_origin
1420 });
1421 if let Some(path) = file_path.clone()
1422 && let Some(obj) = restore_args.as_object_mut()
1423 {
1424 obj.insert("file_path".to_string(), Value::String(path));
1425 }
1426
1427 let restore = self.tool_browser_session_restore(&restore_args).await?;
1428
1429 let humanize = if interaction_level == "none" {
1430 None
1431 } else {
1432 let humanize_args = json!({
1433 "session_id": session_id,
1434 "level": interaction_level
1435 });
1436 Some(self.tool_browser_humanize(&humanize_args).await?)
1437 };
1438
1439 json!({
1440 "mode": "resume",
1441 "session_id": session_id,
1442 "restore": restore,
1443 "humanize": humanize
1444 })
1445 }
1446 other => {
1447 return Err(BrowserError::ConfigError(format!(
1448 "Invalid mode '{other}'. Use one of: capture, resume"
1449 )));
1450 }
1451 };
1452
1453 Ok(payload)
1454 }
1455
1456 async fn tool_browser_session_save(&self, args: &Value) -> Result<Value> {
1457 let session_id = Self::require_str(args, "session_id")?;
1458 let ttl_secs = args.get("ttl_secs").and_then(Value::as_u64);
1459 let file_path = args
1460 .get("file_path")
1461 .and_then(Value::as_str)
1462 .map(ToString::to_string);
1463 let include_snapshot = args
1464 .get("include_snapshot")
1465 .and_then(Value::as_bool)
1466 .unwrap_or(false);
1467
1468 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1469 self.session_runtime(&session_id).await?;
1470
1471 self.ensure_session_page(
1472 &session_id,
1473 &session_arc,
1474 &attached_browser_arc,
1475 &page_arc,
1476 nav_url_opt.as_deref(),
1477 Duration::from_secs(30),
1478 reddit_profile,
1479 )
1480 .await?;
1481
1482 let mut snapshot = {
1483 let page_guard = page_arc.lock().await;
1484 let page = page_guard.as_ref().ok_or_else(|| {
1485 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1486 })?;
1487 let saved = save_session(page).await?;
1488 drop(page_guard);
1489 saved
1490 };
1491
1492 snapshot.ttl_secs = ttl_secs;
1493 if let Some(path) = &file_path {
1494 snapshot.save_to_file(path)?;
1495 }
1496
1497 let cookie_count = snapshot.cookies.len();
1498 let local_storage_keys = snapshot.local_storage.len();
1499 let origin = snapshot.origin.clone();
1500
1501 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1502 session.saved_snapshot = Some(snapshot.clone());
1503 }
1504
1505 let mut out = json!({
1506 "session_id": session_id,
1507 "origin": origin,
1508 "cookie_count": cookie_count,
1509 "local_storage_keys": local_storage_keys,
1510 "ttl_secs": ttl_secs,
1511 "saved_to_file": file_path
1512 });
1513
1514 if include_snapshot && let Some(obj) = out.as_object_mut() {
1515 obj.insert(
1516 "snapshot".to_string(),
1517 serde_json::to_value(&snapshot).map_err(|e| {
1518 BrowserError::ConfigError(format!("failed to serialize session snapshot: {e}"))
1519 })?,
1520 );
1521 }
1522
1523 Ok(out)
1524 }
1525
1526 async fn tool_browser_session_restore(&self, args: &Value) -> Result<Value> {
1527 let session_id = Self::require_str(args, "session_id")?;
1528 let file_path = args
1529 .get("file_path")
1530 .and_then(Value::as_str)
1531 .map(ToString::to_string);
1532 let use_saved = args
1533 .get("use_saved")
1534 .and_then(Value::as_bool)
1535 .unwrap_or(true);
1536 let navigate_to_origin = args
1537 .get("navigate_to_origin")
1538 .and_then(Value::as_bool)
1539 .unwrap_or(true);
1540
1541 let snapshot = if let Some(path) = file_path.as_deref() {
1542 SessionSnapshot::load_from_file(path)?
1543 } else if let Some(inline) = args.get("snapshot") {
1544 serde_json::from_value::<SessionSnapshot>(inline.clone()).map_err(|e| {
1545 BrowserError::ConfigError(format!("invalid inline session snapshot: {e}"))
1546 })?
1547 } else if use_saved {
1548 self.sessions
1549 .lock()
1550 .await
1551 .get(&session_id)
1552 .and_then(|s| s.saved_snapshot.clone())
1553 .ok_or_else(|| {
1554 BrowserError::ConfigError(
1555 "No saved session snapshot found for this session".to_string(),
1556 )
1557 })?
1558 } else {
1559 return Err(BrowserError::ConfigError(
1560 "No restore source provided. Set one of: file_path, snapshot, or use_saved=true"
1561 .to_string(),
1562 ));
1563 };
1564
1565 let source = if file_path.is_some() {
1566 "file"
1567 } else if args.get("snapshot").is_some() {
1568 "inline"
1569 } else {
1570 "saved"
1571 };
1572
1573 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1574 self.session_runtime(&session_id).await?;
1575
1576 self.ensure_session_page(
1577 &session_id,
1578 &session_arc,
1579 &attached_browser_arc,
1580 &page_arc,
1581 nav_url_opt.as_deref(),
1582 Duration::from_secs(30),
1583 reddit_profile,
1584 )
1585 .await?;
1586
1587 {
1588 let mut page_guard = page_arc.lock().await;
1589 let page = page_guard.as_mut().ok_or_else(|| {
1590 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1591 })?;
1592
1593 if navigate_to_origin && !snapshot.origin.is_empty() {
1594 Self::navigate_with_profile(
1595 page,
1596 &snapshot.origin,
1597 Duration::from_secs(30),
1598 reddit_profile,
1599 )
1600 .await?;
1601 }
1602
1603 restore_session(page, &snapshot).await?;
1604 drop(page_guard);
1605 }
1606
1607 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1608 if !snapshot.origin.is_empty() {
1609 session.current_url = Some(snapshot.origin.clone());
1610 }
1611 session.saved_snapshot = Some(snapshot.clone());
1612 }
1613
1614 Ok(json!({
1615 "session_id": session_id,
1616 "source": source,
1617 "origin": snapshot.origin,
1618 "cookie_count": snapshot.cookies.len(),
1619 "local_storage_keys": snapshot.local_storage.len(),
1620 "snapshot_expired": snapshot.is_expired()
1621 }))
1622 }
1623
1624 async fn tool_browser_apply_behavior_json(&self, args: &Value) -> Result<Value> {
1625 let behavior = args.get("behavior").cloned().ok_or_else(|| {
1626 BrowserError::ConfigError("Missing required 'behavior' object".to_string())
1627 })?;
1628
1629 if !behavior.is_object() {
1630 return Err(BrowserError::ConfigError(
1631 "'behavior' must be a JSON object".to_string(),
1632 ));
1633 }
1634
1635 let adapter = PolymorphicBehaviorAdapter::from_json_value(behavior)?;
1636 let mut effective_config = BrowserConfig::default();
1637 let plan = adapter.apply(&mut effective_config);
1638 let adapter_kind = adapter.kind();
1639
1640 let session_id = args
1641 .get("session_id")
1642 .and_then(Value::as_str)
1643 .map(ToString::to_string);
1644
1645 let session_updated = if let Some(sid) = &session_id {
1646 let mut sessions = self.sessions.lock().await;
1647 let session = sessions
1648 .get_mut(sid)
1649 .ok_or_else(|| BrowserError::ConfigError(format!("Unknown session_id: {sid}")))?;
1650
1651 let cdp_fix_mode = serde_json::to_value(effective_config.cdp_fix_mode)
1652 .ok()
1653 .and_then(|value| value.as_str().map(ToString::to_string));
1654
1655 session.behavior_plan = Some(plan.clone());
1656 session.stealth_level = effective_config.stealth_level;
1657 session.cdp_fix_mode = cdp_fix_mode;
1658 session.proxy.clone_from(&effective_config.proxy);
1659
1660 #[cfg(feature = "stealth")]
1661 {
1662 session.webrtc_policy = Some(format!("{:?}", effective_config.webrtc.policy));
1663 }
1664
1665 drop(sessions);
1666 true
1667 } else {
1668 false
1669 };
1670
1671 let effective_view = json!({
1672 "headless": effective_config.headless,
1673 "stealth_level": effective_config.stealth_level,
1674 "proxy": effective_config.proxy,
1675 "window_size": effective_config.window_size,
1676 "cdp_fix_mode": effective_config.cdp_fix_mode,
1677 "args": effective_config.args
1678 });
1679
1680 Ok(json!({
1681 "adapter_kind": adapter_kind,
1682 "plan": plan,
1683 "effective_config": effective_view,
1684 "session_id": session_id,
1685 "session_updated": session_updated
1686 }))
1687 }
1688
1689 async fn tool_browser_humanize(&self, args: &Value) -> Result<Value> {
1690 let session_id = Self::require_str(args, "session_id")?;
1691 let default_level = {
1692 let sessions = self.sessions.lock().await;
1693 sessions
1694 .get(&session_id)
1695 .and_then(|s| s.behavior_plan.as_ref())
1696 .map_or(InteractionLevel::Low, |plan| match plan.interaction_level {
1697 BehaviorInteractionLevel::None => InteractionLevel::None,
1698 BehaviorInteractionLevel::Low => InteractionLevel::Low,
1699 BehaviorInteractionLevel::Medium => InteractionLevel::Medium,
1700 BehaviorInteractionLevel::High => InteractionLevel::High,
1701 })
1702 };
1703 let level = match args.get("level").and_then(Value::as_str) {
1704 Some("none") => InteractionLevel::None,
1705 Some("medium") => InteractionLevel::Medium,
1706 Some("high") => InteractionLevel::High,
1707 Some(_) => InteractionLevel::Low,
1708 None => default_level,
1709 };
1710 let viewport_width = args
1711 .get("viewport_width")
1712 .and_then(Value::as_f64)
1713 .unwrap_or(1366.0);
1714 let viewport_height = args
1715 .get("viewport_height")
1716 .and_then(Value::as_f64)
1717 .unwrap_or(768.0);
1718
1719 let (session_arc, attached_browser_arc, page_arc, nav_url_opt, reddit_profile) =
1720 self.session_runtime(&session_id).await?;
1721
1722 self.ensure_session_page(
1723 &session_id,
1724 &session_arc,
1725 &attached_browser_arc,
1726 &page_arc,
1727 nav_url_opt.as_deref(),
1728 Duration::from_secs(30),
1729 reddit_profile,
1730 )
1731 .await?;
1732
1733 {
1734 let page_guard = page_arc.lock().await;
1735 let page = page_guard.as_ref().ok_or_else(|| {
1736 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1737 })?;
1738
1739 let mut simulator = InteractionSimulator::new(level);
1740 simulator
1741 .random_interaction(page.inner(), viewport_width, viewport_height)
1742 .await?;
1743 drop(page_guard);
1744 }
1745
1746 let level_str = match level {
1747 InteractionLevel::None => "none",
1748 InteractionLevel::Low => "low",
1749 InteractionLevel::Medium => "medium",
1750 InteractionLevel::High => "high",
1751 };
1752
1753 Ok(json!({
1754 "session_id": session_id,
1755 "level": level_str,
1756 "viewport_width": viewport_width,
1757 "viewport_height": viewport_height,
1758 "applied": true
1759 }))
1760 }
1761
1762 async fn tool_browser_query(&self, args: &Value) -> Result<Value> {
1763 let session_id = Self::require_str(args, "session_id")?;
1764 let url = Self::require_str(args, "url")?;
1765 let selector = Self::require_str(args, "selector")?;
1766 let limit = usize::try_from(
1767 args.get("limit")
1768 .and_then(serde_json::Value::as_u64)
1769 .unwrap_or(50),
1770 )
1771 .unwrap_or(50);
1772 let timeout_secs = args
1773 .get("timeout_secs")
1774 .and_then(serde_json::Value::as_f64)
1775 .unwrap_or(30.0);
1776
1777 let fields: Option<Vec<(String, Option<String>)>> =
1779 args.get("fields").and_then(|v| v.as_object()).map(|obj| {
1780 obj.iter()
1781 .map(|(k, v)| {
1782 let attr = v
1783 .get("attr")
1784 .and_then(serde_json::Value::as_str)
1785 .map(ToString::to_string);
1786 (k.clone(), attr)
1787 })
1788 .collect()
1789 });
1790
1791 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1792 self.session_runtime(&session_id).await?;
1793 self.ensure_session_page(
1794 &session_id,
1795 &session_arc,
1796 &attached_browser_arc,
1797 &page_arc,
1798 None,
1799 Duration::from_secs_f64(timeout_secs),
1800 reddit_profile,
1801 )
1802 .await?;
1803
1804 let mut page_guard = page_arc.lock().await;
1805 let page = page_guard.as_mut().ok_or_else(|| {
1806 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1807 })?;
1808
1809 Self::navigate_with_profile(
1810 page,
1811 &url,
1812 Duration::from_secs_f64(timeout_secs),
1813 reddit_profile,
1814 )
1815 .await?;
1816
1817 let all_nodes = page.query_selector_all(&selector).await?;
1818 let nodes = all_nodes.get(..limit).unwrap_or(&all_nodes);
1819 let mut results: Vec<Value> = Vec::with_capacity(nodes.len());
1820 if let Some(ref field_defs) = fields {
1821 for node in nodes {
1822 let mut obj = serde_json::Map::new();
1823 for (field_name, attr_name) in field_defs {
1824 let val = if let Some(attr) = attr_name {
1825 node.attr(attr)
1826 .await
1827 .map_or(Value::Null, |opt| opt.map_or(Value::Null, Value::String))
1828 } else {
1829 node.text_content().await.map_or(Value::Null, Value::String)
1830 };
1831 obj.insert(field_name.clone(), val);
1832 }
1833 results.push(Value::Object(obj));
1834 }
1835 } else {
1836 for node in nodes {
1837 let text = node.text_content().await.unwrap_or_default();
1838 results.push(Value::String(text));
1839 }
1840 }
1841 drop(page_guard);
1842 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1843 session.current_url = Some(url.clone());
1844 }
1845
1846 Ok(json!({
1847 "url": url,
1848 "selector": selector,
1849 "count": results.len(),
1850 "results": results
1851 }))
1852 }
1853
1854 async fn tool_browser_extract(&self, args: &Value) -> Result<Value> {
1855 let session_id = Self::require_str(args, "session_id")?;
1856 let url = Self::require_str(args, "url")?;
1857 let root_selector = Self::require_str(args, "root_selector")?;
1858 let timeout_secs = args
1859 .get("timeout_secs")
1860 .and_then(serde_json::Value::as_f64)
1861 .unwrap_or(30.0);
1862
1863 let schema_obj = args
1865 .get("schema")
1866 .and_then(|v| v.as_object())
1867 .ok_or_else(|| {
1868 BrowserError::ConfigError("Missing or non-object 'schema' argument".to_string())
1869 })?;
1870
1871 let schema: Vec<(String, ExtractFieldDef)> = schema_obj
1872 .iter()
1873 .filter_map(|(name, spec)| {
1874 let selector = spec
1875 .get("selector")
1876 .and_then(serde_json::Value::as_str)
1877 .map(ToString::to_string)?;
1878 let attr = spec
1879 .get("attr")
1880 .and_then(serde_json::Value::as_str)
1881 .map(ToString::to_string);
1882 let required = spec
1883 .get("required")
1884 .and_then(serde_json::Value::as_bool)
1885 .unwrap_or(false);
1886 Some((
1887 name.clone(),
1888 ExtractFieldDef {
1889 selector,
1890 attr,
1891 required,
1892 },
1893 ))
1894 })
1895 .collect();
1896
1897 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1898 self.session_runtime(&session_id).await?;
1899 self.ensure_session_page(
1900 &session_id,
1901 &session_arc,
1902 &attached_browser_arc,
1903 &page_arc,
1904 None,
1905 Duration::from_secs_f64(timeout_secs),
1906 reddit_profile,
1907 )
1908 .await?;
1909
1910 let mut page_guard = page_arc.lock().await;
1911 let page = page_guard.as_mut().ok_or_else(|| {
1912 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1913 })?;
1914
1915 Self::navigate_with_profile(
1916 page,
1917 &url,
1918 Duration::from_secs_f64(timeout_secs),
1919 reddit_profile,
1920 )
1921 .await?;
1922
1923 let roots = page.query_selector_all(&root_selector).await?;
1924 let mut results: Vec<Value> = Vec::with_capacity(roots.len());
1925 for root in &roots {
1926 if let Some(obj) = Self::extract_record(root, &schema).await {
1927 results.push(Value::Object(obj));
1928 }
1929 }
1930 drop(page_guard);
1931 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
1932 session.current_url = Some(url.clone());
1933 }
1934
1935 Ok(json!({
1936 "url": url,
1937 "root_selector": root_selector,
1938 "count": results.len(),
1939 "results": results
1940 }))
1941 }
1942
1943 #[cfg(feature = "similarity")]
1944 async fn tool_browser_find_similar(&self, args: &Value) -> Result<Value> {
1945 use crate::similarity::SimilarityConfig;
1946
1947 let session_id = Self::require_str(args, "session_id")?;
1948 let url = Self::require_str(args, "url")?;
1949 let reference_selector = Self::require_str(args, "reference_selector")?;
1950 #[allow(clippy::cast_possible_truncation)]
1951 let threshold = args
1952 .get("threshold")
1953 .and_then(serde_json::Value::as_f64)
1954 .map_or(SimilarityConfig::DEFAULT_THRESHOLD, |v| v as f32);
1955 let max_results = usize::try_from(
1956 args.get("max_results")
1957 .and_then(serde_json::Value::as_u64)
1958 .unwrap_or(10),
1959 )
1960 .unwrap_or(10);
1961 let timeout_secs = args
1962 .get("timeout_secs")
1963 .and_then(serde_json::Value::as_f64)
1964 .unwrap_or(30.0);
1965
1966 let config = SimilarityConfig {
1967 threshold,
1968 max_results,
1969 };
1970
1971 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
1972 self.session_runtime(&session_id).await?;
1973 self.ensure_session_page(
1974 &session_id,
1975 &session_arc,
1976 &attached_browser_arc,
1977 &page_arc,
1978 None,
1979 Duration::from_secs_f64(timeout_secs),
1980 reddit_profile,
1981 )
1982 .await?;
1983
1984 let mut page_guard = page_arc.lock().await;
1985 let page = page_guard.as_mut().ok_or_else(|| {
1986 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
1987 })?;
1988
1989 Self::navigate_with_profile(
1990 page,
1991 &url,
1992 Duration::from_secs_f64(timeout_secs),
1993 reddit_profile,
1994 )
1995 .await?;
1996
1997 let refs = page.query_selector_all(&reference_selector).await?;
1999 let Some(reference) = refs.into_iter().next() else {
2000 return Ok(json!({
2001 "isError": true,
2002 "error": format!("Reference selector matched no elements: {reference_selector}")
2003 }));
2004 };
2005
2006 let ref_fp = reference.fingerprint().await?;
2007 let matches = page.find_similar(&reference, config).await?;
2008
2009 let mut match_results: Vec<Value> = Vec::with_capacity(matches.len());
2010 for m in &matches {
2011 let text = m.node.text_content().await.unwrap_or_default();
2012 let snippet = m.node.inner_html().await.unwrap_or_default();
2013 let snippet: String = snippet.chars().take(200).collect();
2014 match_results.push(json!({
2015 "score": m.score,
2016 "text": text,
2017 "outer_html_snippet": snippet
2018 }));
2019 }
2020 drop(page_guard);
2021 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
2022 session.current_url = Some(url.clone());
2023 }
2024
2025 Ok(json!({
2026 "url": url,
2027 "reference": {
2028 "tag": ref_fp.tag,
2029 "classes": ref_fp.classes,
2030 "attr_names": ref_fp.attr_names,
2031 "depth": ref_fp.depth
2032 },
2033 "count": match_results.len(),
2034 "matches": match_results
2035 }))
2036 }
2037
2038 async fn tool_browser_warmup(&self, args: &Value) -> Result<Value> {
2039 use crate::page::{WarmupOptions, WarmupWait};
2040
2041 let session_id = Self::require_str(args, "session_id")?;
2042 let url = Self::require_str(args, "url")?;
2043 let wait = match args
2044 .get("wait")
2045 .and_then(|v| v.as_str())
2046 .unwrap_or("dom_content_loaded")
2047 {
2048 "network_idle" => WarmupWait::NetworkIdle,
2049 _ => WarmupWait::DomContentLoaded,
2050 };
2051 let timeout_ms = args
2052 .get("timeout_ms")
2053 .and_then(serde_json::Value::as_u64)
2054 .unwrap_or(30_000);
2055 let stabilize_ms = args
2056 .get("stabilize_ms")
2057 .and_then(serde_json::Value::as_u64)
2058 .unwrap_or(0);
2059
2060 let (session_arc, attached_browser_arc, page_arc, _, _) =
2061 self.session_runtime(&session_id).await?;
2062 self.ensure_session_page(
2063 &session_id,
2064 &session_arc,
2065 &attached_browser_arc,
2066 &page_arc,
2067 None,
2068 Duration::from_millis(timeout_ms),
2069 false,
2070 )
2071 .await?;
2072
2073 let mut page_guard = page_arc.lock().await;
2074 let page = page_guard.as_mut().ok_or_else(|| {
2075 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2076 })?;
2077
2078 let report = page
2079 .warmup(WarmupOptions {
2080 url,
2081 wait,
2082 timeout_ms,
2083 stabilize_ms,
2084 })
2085 .await?;
2086 drop(page_guard);
2087
2088 Ok(json!({
2089 "session_id": session_id,
2090 "url": report.url,
2091 "elapsed_ms": report.elapsed_ms,
2092 "status_code": report.status_code,
2093 "title": report.title,
2094 "stabilized": report.stabilized
2095 }))
2096 }
2097
2098 async fn tool_browser_refresh(&self, args: &Value) -> Result<Value> {
2099 use crate::page::{RefreshOptions, WarmupWait};
2100
2101 let session_id = Self::require_str(args, "session_id")?;
2102 let wait = match args
2103 .get("wait")
2104 .and_then(|v| v.as_str())
2105 .unwrap_or("dom_content_loaded")
2106 {
2107 "network_idle" => WarmupWait::NetworkIdle,
2108 _ => WarmupWait::DomContentLoaded,
2109 };
2110 let timeout_ms = args
2111 .get("timeout_ms")
2112 .and_then(serde_json::Value::as_u64)
2113 .unwrap_or(30_000);
2114 let reset_connection = args
2115 .get("reset_connection")
2116 .and_then(serde_json::Value::as_bool)
2117 .unwrap_or(false);
2118
2119 let (session_arc, attached_browser_arc, page_arc, _, _) =
2120 self.session_runtime(&session_id).await?;
2121 self.ensure_session_page(
2122 &session_id,
2123 &session_arc,
2124 &attached_browser_arc,
2125 &page_arc,
2126 None,
2127 Duration::from_millis(timeout_ms),
2128 false,
2129 )
2130 .await?;
2131
2132 let mut page_guard = page_arc.lock().await;
2133 let page = page_guard.as_mut().ok_or_else(|| {
2134 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2135 })?;
2136
2137 let report = page
2138 .refresh(RefreshOptions {
2139 wait,
2140 timeout_ms,
2141 reset_connection,
2142 })
2143 .await?;
2144 drop(page_guard);
2145
2146 Ok(json!({
2147 "session_id": session_id,
2148 "url": report.url,
2149 "elapsed_ms": report.elapsed_ms,
2150 "status_code": report.status_code
2151 }))
2152 }
2153
2154 async fn tool_browser_release(&self, args: &Value) -> Result<Value> {
2155 let session_id = Self::require_str(args, "session_id")?;
2156
2157 let (session_arc, attached_browser_arc, attached_handler_task_arc, page_arc) = {
2159 let mut sessions = self.sessions.lock().await;
2160 let removed = sessions.remove(&session_id).ok_or_else(|| {
2161 BrowserError::ConfigError(format!("Unknown session: {session_id}"))
2162 })?;
2163 drop(sessions);
2164 (
2165 removed.handle,
2166 removed.attached_browser,
2167 removed.attached_handler_task,
2168 removed.page,
2169 )
2170 };
2171
2172 let handle = session_arc.lock().await.take();
2174 if let Some(h) = handle {
2175 h.release().await;
2176 }
2177
2178 let attached_browser = attached_browser_arc.lock().await.take();
2179 if let Some(mut browser) = attached_browser {
2180 let close_timeout = Duration::from_secs(5);
2181 match tokio::time::timeout(close_timeout, browser.close()).await {
2182 Ok(Ok(_)) => {}
2183 Ok(Err(error)) => {
2184 tracing::warn!(%session_id, %error, "attached browser close failed during release");
2185 }
2186 Err(_) => {
2187 tracing::warn!(%session_id, "attached browser close timed out during release");
2188 }
2189 }
2190 }
2191
2192 let attached_handler_task = attached_handler_task_arc.lock().await.take();
2193 if let Some(task) = attached_handler_task {
2194 task.abort();
2195 }
2196
2197 let page = page_arc.lock().await.take();
2198 if let Some(page) = page {
2199 page.close().await.ok();
2200 }
2201
2202 info!(%session_id, "MCP session released");
2203 Ok(json!({ "released": true, "session_id": session_id }))
2204 }
2205
2206 #[cfg(feature = "stealth")]
2207 async fn tool_browser_validate_stealth(&self, args: &Value) -> Result<Value> {
2208 use crate::validation::{ValidationResult, ValidationSuite, ValidationTarget};
2209
2210 let tier1_only = args
2211 .get("tier1_only")
2212 .and_then(Value::as_bool)
2213 .unwrap_or(false);
2214 let timeout_secs = args
2215 .get("timeout_secs")
2216 .and_then(Value::as_u64)
2217 .unwrap_or(30);
2218
2219 let targets = if tier1_only {
2221 ValidationTarget::tier1().to_vec()
2222 } else {
2223 args.get("targets").and_then(|v| v.as_array()).map_or_else(
2224 || ValidationTarget::tier1().to_vec(),
2225 |arr| {
2226 arr.iter()
2227 .filter_map(|v| v.as_str())
2228 .filter_map(|s| match s {
2229 "creepjs" => Some(ValidationTarget::CreepJs),
2230 "browserscan" => Some(ValidationTarget::BrowserScan),
2231 "fingerprint_js" => Some(ValidationTarget::FingerprintJs),
2232 "kasada" => Some(ValidationTarget::Kasada),
2233 "cloudflare" => Some(ValidationTarget::Cloudflare),
2234 "akamai" => Some(ValidationTarget::Akamai),
2235 "data_dome" => Some(ValidationTarget::DataDome),
2236 "perimeter_x" => Some(ValidationTarget::PerimeterX),
2237 _ => None,
2238 })
2239 .collect::<Vec<_>>()
2240 },
2241 )
2242 };
2243
2244 let mut results = Vec::with_capacity(targets.len());
2246 for target in targets {
2247 let timed = tokio::time::timeout(
2248 Duration::from_secs(timeout_secs),
2249 ValidationSuite::run_one(&self.pool, target),
2250 )
2251 .await;
2252 match timed {
2253 Ok(result) => results.push(result),
2254 Err(_) => results.push(ValidationResult::failed(
2255 target,
2256 &format!("validation timed out after {timeout_secs}s"),
2257 )),
2258 }
2259 }
2260
2261 serde_json::to_value(&results)
2263 .map_err(|e| BrowserError::ConfigError(format!("failed to serialize results: {e}")))
2264 }
2265
2266 fn tool_pool_stats(&self) -> Value {
2267 let stats = self.pool.stats();
2268 json!({
2269 "active": stats.active,
2270 "max": stats.max,
2271 "available": stats.available
2272 })
2273 }
2274
2275 async fn handle_resources_list(&self, id: Value) -> JsonRpcResponse {
2278 let resources: Vec<Value> = self
2279 .sessions
2280 .lock()
2281 .await
2282 .keys()
2283 .map(|sid| {
2284 json!({
2285 "uri": format!("browser://session/{sid}"),
2286 "name": format!("Browser session {sid}"),
2287 "mimeType": "application/json"
2288 })
2289 })
2290 .collect();
2291
2292 JsonRpcResponse::ok(id, json!({ "resources": resources }))
2293 }
2294
2295 async fn handle_resources_read(&self, id: Value, params: Value) -> JsonRpcResponse {
2298 let uri = match params.get("uri").and_then(|v| v.as_str()) {
2299 Some(u) => u.to_string(),
2300 None => return JsonRpcResponse::err(id, -32602, "Missing 'uri'"),
2301 };
2302
2303 let session_id = uri
2305 .strip_prefix("browser://session/")
2306 .unwrap_or("")
2307 .to_string();
2308
2309 let session_config: Option<Value> = {
2311 let sessions = self.sessions.lock().await;
2312 sessions.get(&session_id).map(|s| {
2313 json!({
2314 "stealth_level": format!("{:?}", s.stealth_level).to_lowercase(),
2315 "tls_profile": s.tls_profile,
2316 "webrtc_policy": s.webrtc_policy,
2317 "cdp_fix_mode": s.cdp_fix_mode,
2318 "proxy": s.proxy,
2319 "target_profile": s.target_profile,
2320 "current_url": s.current_url,
2321 "has_saved_snapshot": s.saved_snapshot.is_some(),
2322 "attach_endpoint": s.attach_endpoint,
2323 "has_behavior_plan": s.behavior_plan.is_some(),
2324 "behavior_plan": s.behavior_plan.as_ref()
2325 })
2326 })
2327 };
2328
2329 if let Some(config) = session_config {
2330 let pool_stats = self.pool.stats();
2331 JsonRpcResponse::ok(
2332 id,
2333 json!({
2334 "contents": [{
2335 "uri": uri,
2336 "mimeType": "application/json",
2337 "text": serde_json::to_string_pretty(&json!({
2338 "session_id": session_id,
2339 "config": config,
2340 "pool_active": pool_stats.active,
2341 "pool_max": pool_stats.max
2342 })).unwrap_or_default()
2343 }]
2344 }),
2345 )
2346 } else {
2347 JsonRpcResponse::err(id, -32002, format!("Resource not found: {uri}"))
2348 }
2349 }
2350
2351 async fn session_runtime(
2354 &self,
2355 session_id: &str,
2356 ) -> Result<(
2357 Arc<Mutex<Option<BrowserHandle>>>,
2358 Arc<Mutex<Option<Browser>>>,
2359 Arc<Mutex<Option<crate::page::PageHandle>>>,
2360 Option<String>,
2361 bool,
2362 )> {
2363 self.sessions
2364 .lock()
2365 .await
2366 .get(session_id)
2367 .map(|s| {
2368 (
2369 s.handle.clone(),
2370 s.attached_browser.clone(),
2371 s.page.clone(),
2372 s.current_url.clone(),
2373 s.target_profile == "reddit",
2374 )
2375 })
2376 .ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))
2377 }
2378
2379 #[expect(
2380 clippy::too_many_arguments,
2381 reason = "session runtime handles and bootstrap options are passed explicitly for clarity"
2382 )]
2383 async fn ensure_session_page(
2384 &self,
2385 session_id: &str,
2386 handle_arc: &Arc<Mutex<Option<BrowserHandle>>>,
2387 attached_browser_arc: &Arc<Mutex<Option<Browser>>>,
2388 page_arc: &Arc<Mutex<Option<crate::page::PageHandle>>>,
2389 current_url: Option<&str>,
2390 timeout: Duration,
2391 reddit_profile: bool,
2392 ) -> Result<()> {
2393 let mut page_guard = page_arc.lock().await;
2394 let created = if page_guard.is_none() {
2395 let new_page =
2396 Self::create_session_page(session_id, handle_arc, attached_browser_arc).await?;
2397
2398 *page_guard = Some(new_page);
2399 true
2400 } else {
2401 false
2402 };
2403
2404 if created
2405 && let Some(url) = current_url
2406 && let Some(page) = page_guard.as_mut()
2407 {
2408 Self::navigate_with_profile(page, url, timeout, reddit_profile).await?;
2409 }
2410
2411 drop(page_guard);
2412
2413 Ok(())
2414 }
2415
2416 async fn create_session_page(
2417 session_id: &str,
2418 handle_arc: &Arc<Mutex<Option<BrowserHandle>>>,
2419 attached_browser_arc: &Arc<Mutex<Option<Browser>>>,
2420 ) -> Result<crate::page::PageHandle> {
2421 let handle_guard = handle_arc.lock().await;
2422 if let Some(handle) = handle_guard.as_ref() {
2423 let browser = handle.browser().ok_or_else(|| {
2424 BrowserError::ConfigError(format!("Browser handle invalid: {session_id}"))
2425 })?;
2426 let page = browser.new_page().await?;
2427 drop(handle_guard);
2428 return Ok(page);
2429 }
2430 drop(handle_guard);
2431
2432 let browser_guard = attached_browser_arc.lock().await;
2433 let browser = browser_guard.as_ref().ok_or_else(|| {
2434 BrowserError::ConfigError(format!("Session already released: {session_id}"))
2435 })?;
2436 let raw_page =
2437 browser
2438 .new_page("about:blank")
2439 .await
2440 .map_err(|e| BrowserError::CdpError {
2441 operation: "Browser.newPage".to_string(),
2442 message: e.to_string(),
2443 })?;
2444 drop(browser_guard);
2445
2446 Ok(crate::page::PageHandle::new(
2447 raw_page,
2448 Duration::from_secs(30),
2449 ))
2450 }
2451
2452 async fn navigate_with_profile(
2453 page: &mut crate::page::PageHandle,
2454 url: &str,
2455 timeout: Duration,
2456 reddit_profile: bool,
2457 ) -> Result<(bool, bool)> {
2458 let wait_until = if reddit_profile {
2459 WaitUntil::DomContentLoaded
2460 } else {
2461 WaitUntil::Selector("body".to_string())
2462 };
2463
2464 page.navigate(url, wait_until, timeout).await?;
2465
2466 if reddit_profile || url.contains("reddit.com") {
2467 return Self::wait_for_reddit_challenge(page, timeout).await;
2468 }
2469
2470 Ok((false, true))
2471 }
2472
2473 async fn wait_for_reddit_challenge(
2474 page: &crate::page::PageHandle,
2475 timeout: Duration,
2476 ) -> Result<(bool, bool)> {
2477 let max_wait = timeout.min(Duration::from_secs(15));
2478 let mut elapsed = Duration::ZERO;
2479 let interval = Duration::from_millis(500);
2480 let mut challenge_seen = false;
2481
2482 while elapsed <= max_wait {
2483 let challenge_state = page
2484 .eval::<Value>(
2485 r#"(() => {
2486 const title = (document.title || "").toLowerCase();
2487 const href = (location.href || "").toLowerCase();
2488 const body = (document.body?.innerText || "").toLowerCase();
2489 const challenge =
2490 title.includes("verification") ||
2491 title.includes("just a moment") ||
2492 href.includes("/js_challenge") ||
2493 body.includes("please wait for verification") ||
2494 body.includes("verify you are human");
2495 return {
2496 challenge,
2497 ready: document.readyState === "complete"
2498 };
2499 })()"#,
2500 )
2501 .await
2502 .unwrap_or_else(|_| json!({"challenge": false, "ready": true}));
2503
2504 let is_challenge = challenge_state
2505 .get("challenge")
2506 .and_then(Value::as_bool)
2507 .unwrap_or(false);
2508 let ready = challenge_state
2509 .get("ready")
2510 .and_then(Value::as_bool)
2511 .unwrap_or(true);
2512
2513 challenge_seen |= is_challenge;
2514 if !is_challenge && ready {
2515 return Ok((challenge_seen, true));
2516 }
2517
2518 sleep(interval).await;
2519 elapsed += interval;
2520 }
2521
2522 Ok((challenge_seen, false))
2523 }
2524
2525 #[cfg(feature = "stealth")]
2526 async fn session_handle_and_stealth(
2527 &self,
2528 session_id: &str,
2529 ) -> Result<(Arc<Mutex<Option<BrowserHandle>>>, String)> {
2530 self.sessions
2531 .lock()
2532 .await
2533 .get(session_id)
2534 .map(|s| {
2535 (
2536 s.handle.clone(),
2537 format!("{:?}", s.stealth_level).to_lowercase(),
2538 )
2539 })
2540 .ok_or_else(|| BrowserError::ConfigError(format!("Unknown session: {session_id}")))
2541 }
2542
2543 async fn tool_browser_extract_with_fallback(&self, args: &Value) -> Result<Value> {
2547 let session_id = Self::require_str(args, "session_id")?;
2548 let url = Self::require_str(args, "url")?;
2549 let timeout_secs = args
2550 .get("timeout_secs")
2551 .and_then(serde_json::Value::as_f64)
2552 .unwrap_or(30.0);
2553 let selectors = Self::parse_root_selectors(args)?;
2554 let schema = Self::parse_extract_schema(args)?;
2555
2556 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
2557 self.session_runtime(&session_id).await?;
2558 self.ensure_session_page(
2559 &session_id,
2560 &session_arc,
2561 &attached_browser_arc,
2562 &page_arc,
2563 None,
2564 Duration::from_secs_f64(timeout_secs),
2565 reddit_profile,
2566 )
2567 .await?;
2568
2569 let mut page_guard = page_arc.lock().await;
2570 let page = page_guard.as_mut().ok_or_else(|| {
2571 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2572 })?;
2573
2574 Self::navigate_with_profile(
2575 page,
2576 &url,
2577 Duration::from_secs_f64(timeout_secs),
2578 reddit_profile,
2579 )
2580 .await?;
2581
2582 let mut matched_selector = String::new();
2583 let mut results: Vec<Value> = vec![];
2584
2585 for selector in &selectors {
2586 let roots = page.query_selector_all(selector).await?;
2587 if roots.is_empty() {
2588 continue;
2589 }
2590
2591 let mut selector_results: Vec<Value> = Vec::with_capacity(roots.len());
2592 for root in &roots {
2593 if let Some(obj) = Self::extract_record(root, &schema).await {
2594 selector_results.push(Value::Object(obj));
2595 }
2596 }
2597
2598 if selector_results.is_empty() {
2599 continue;
2600 }
2601
2602 matched_selector = selector.clone();
2603 results = selector_results;
2604 break;
2605 }
2606 drop(page_guard);
2607 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
2608 session.current_url = Some(url.clone());
2609 }
2610
2611 Ok(json!({
2612 "url": url,
2613 "matched_selector": matched_selector,
2614 "tried_selectors": selectors,
2615 "count": results.len(),
2616 "results": results
2617 }))
2618 }
2619
2620 async fn tool_browser_extract_resilient(&self, args: &Value) -> Result<Value> {
2625 let session_id = Self::require_str(args, "session_id")?;
2626 let url = Self::require_str(args, "url")?;
2627 let root_selector = Self::require_str(args, "root_selector")?;
2628 let timeout_secs = args
2629 .get("timeout_secs")
2630 .and_then(serde_json::Value::as_f64)
2631 .unwrap_or(30.0);
2632 let schema = Self::parse_extract_schema(args)?;
2633
2634 let (session_arc, attached_browser_arc, page_arc, _, reddit_profile) =
2635 self.session_runtime(&session_id).await?;
2636 self.ensure_session_page(
2637 &session_id,
2638 &session_arc,
2639 &attached_browser_arc,
2640 &page_arc,
2641 None,
2642 Duration::from_secs_f64(timeout_secs),
2643 reddit_profile,
2644 )
2645 .await?;
2646
2647 let mut page_guard = page_arc.lock().await;
2648 let page = page_guard.as_mut().ok_or_else(|| {
2649 BrowserError::ConfigError(format!("Session page unavailable: {session_id}"))
2650 })?;
2651
2652 Self::navigate_with_profile(
2653 page,
2654 &url,
2655 Duration::from_secs_f64(timeout_secs),
2656 reddit_profile,
2657 )
2658 .await?;
2659
2660 let roots = page.query_selector_all(&root_selector).await?;
2661 let mut results: Vec<Value> = Vec::with_capacity(roots.len());
2664 let mut skipped: usize = 0;
2665 for root in &roots {
2666 match Self::extract_record(root, &schema).await {
2667 Some(obj) => results.push(Value::Object(obj)),
2668 None => skipped += 1,
2669 }
2670 }
2671 drop(page_guard);
2672 if let Some(session) = self.sessions.lock().await.get_mut(&session_id) {
2673 session.current_url = Some(url.clone());
2674 }
2675
2676 Ok(json!({
2677 "url": url,
2678 "root_selector": root_selector,
2679 "count": results.len(),
2680 "skipped": skipped,
2681 "results": results
2682 }))
2683 }
2684
2685 async fn extract_record(
2686 root: &crate::page::NodeHandle,
2687 schema: &[(String, ExtractFieldDef)],
2688 ) -> Option<serde_json::Map<String, Value>> {
2689 let mut obj = serde_json::Map::new();
2690 for (field_name, def) in schema {
2691 let Ok(children) = root.children_matching(&def.selector).await else {
2692 if def.required {
2693 return None;
2694 }
2695 obj.insert(field_name.clone(), Value::Null);
2696 continue;
2697 };
2698 let val = match children.into_iter().next() {
2699 None => {
2700 if def.required {
2701 return None;
2702 }
2703 Value::Null
2704 }
2705 Some(node) => {
2706 if let Some(attr) = &def.attr {
2707 node.attr(attr)
2708 .await
2709 .map_or(Value::Null, |opt| opt.map_or(Value::Null, Value::String))
2710 } else {
2711 node.text_content().await.map_or(Value::Null, Value::String)
2712 }
2713 }
2714 };
2715 obj.insert(field_name.clone(), val);
2716 }
2717 Some(obj)
2718 }
2719
2720 fn require_str(args: &Value, key: &str) -> Result<String> {
2721 args.get(key)
2722 .and_then(|v| v.as_str())
2723 .map(ToString::to_string)
2724 .ok_or_else(|| BrowserError::ConfigError(format!("Missing required argument: {key}")))
2725 }
2726
2727 fn parse_acquisition_mode(mode: &str) -> Result<AcquisitionMode> {
2728 match mode {
2729 "fast" => Ok(AcquisitionMode::Fast),
2730 "resilient" => Ok(AcquisitionMode::Resilient),
2731 "hostile" => Ok(AcquisitionMode::Hostile),
2732 "investigate" => Ok(AcquisitionMode::Investigate),
2733 other => Err(BrowserError::ConfigError(format!(
2734 "Invalid mode '{other}'. Use one of: fast, resilient, hostile, investigate"
2735 ))),
2736 }
2737 }
2738
2739 fn parse_acquisition_request(args: &Value) -> Result<AcquisitionRequest> {
2740 const MAX_ACQUISITION_TIMEOUT_SECS: f64 = 86_400.0;
2741
2742 let url = Self::require_str(args, "url")?;
2743 let mode_raw = Self::require_str(args, "mode")?;
2744 let mode = Self::parse_acquisition_mode(&mode_raw)?;
2745
2746 let wait_for_selector = args
2747 .get("wait_for_selector")
2748 .or_else(|| args.get("selector_wait"))
2749 .and_then(Value::as_str)
2750 .map(ToString::to_string);
2751
2752 let extraction_js = args
2753 .get("extraction_js")
2754 .and_then(Value::as_str)
2755 .map(ToString::to_string);
2756
2757 let browserbase_enabled = args
2758 .get("browserbase_enabled")
2759 .or_else(|| args.get("use_browserbase"))
2760 .and_then(Value::as_bool)
2761 .unwrap_or(false);
2762
2763 let total_timeout = match args.get("total_timeout_secs").and_then(Value::as_f64) {
2764 Some(value)
2765 if value.is_finite() && value > 0.0 && value <= MAX_ACQUISITION_TIMEOUT_SECS =>
2766 {
2767 Duration::from_secs_f64(value)
2768 }
2769 Some(_) => {
2770 return Err(BrowserError::ConfigError(format!(
2771 "total_timeout_secs must be a positive finite number <= {MAX_ACQUISITION_TIMEOUT_SECS}"
2772 )));
2773 }
2774 None => AcquisitionRequest::default().total_timeout,
2775 };
2776
2777 Ok(AcquisitionRequest {
2778 url,
2779 mode,
2780 wait_for_selector,
2781 extraction_js,
2782 total_timeout,
2783 browserbase_enabled,
2784 ..AcquisitionRequest::default()
2785 })
2786 }
2787
2788 fn acquisition_result_to_tool_output(result: &AcquisitionResult) -> Value {
2789 let strategy_used = serde_json::to_value(result.strategy_used).unwrap_or(Value::Null);
2790 let attempted = serde_json::to_value(&result.attempted).unwrap_or(Value::Array(Vec::new()));
2791 let failures = serde_json::to_value(&result.failures).unwrap_or(Value::Array(Vec::new()));
2792
2793 json!({
2794 "success": result.success,
2795 "strategy_used": strategy_used,
2796 "final_url": result.final_url,
2797 "status_code": result.status_code,
2798 "extracted": result.extracted,
2799 "html_excerpt": result.html_excerpt,
2800 "diagnostics": {
2801 "attempted": attempted,
2802 "timed_out": result.timed_out,
2803 "failure_count": result.failures.len(),
2804 "failures": failures
2805 }
2806 })
2807 }
2808
2809 fn parse_root_selectors(args: &Value) -> Result<Vec<String>> {
2810 let selectors: Vec<String> = args
2811 .get("root_selectors")
2812 .and_then(Value::as_array)
2813 .ok_or_else(|| {
2814 BrowserError::ConfigError(
2815 "Missing or non-array 'root_selectors' argument".to_string(),
2816 )
2817 })?
2818 .iter()
2819 .filter_map(|v| v.as_str().map(str::to_string))
2820 .collect();
2821
2822 if selectors.is_empty() {
2823 return Err(BrowserError::ConfigError(
2824 "root_selectors must contain at least one entry".to_string(),
2825 ));
2826 }
2827 Ok(selectors)
2828 }
2829
2830 fn parse_extract_schema(args: &Value) -> Result<Vec<(String, ExtractFieldDef)>> {
2831 let schema_obj = args
2832 .get("schema")
2833 .and_then(Value::as_object)
2834 .ok_or_else(|| {
2835 BrowserError::ConfigError("Missing or non-object 'schema' argument".to_string())
2836 })?;
2837
2838 Ok(schema_obj
2839 .iter()
2840 .filter_map(|(name, spec)| {
2841 let selector = spec
2842 .get("selector")
2843 .and_then(Value::as_str)
2844 .map(ToString::to_string)?;
2845 let attr = spec
2846 .get("attr")
2847 .and_then(Value::as_str)
2848 .map(ToString::to_string);
2849 let required = spec
2850 .get("required")
2851 .and_then(Value::as_bool)
2852 .unwrap_or(false);
2853 Some((
2854 name.clone(),
2855 ExtractFieldDef {
2856 selector,
2857 attr,
2858 required,
2859 },
2860 ))
2861 })
2862 .collect())
2863 }
2864}
2865
2866fn mcp_enabled_from(value: &str) -> bool {
2869 matches!(value.to_lowercase().as_str(), "true" | "1" | "yes")
2870}
2871
2872pub fn is_mcp_enabled() -> bool {
2877 mcp_enabled_from(&std::env::var("STYGIAN_MCP_ENABLED").unwrap_or_default())
2878}
2879
2880#[cfg(test)]
2881mod tests {
2882 use super::*;
2883
2884 #[test]
2885 fn tool_defs_include_browser_query() {
2886 let defs = &*TOOL_DEFINITIONS;
2887 assert!(
2888 defs.iter()
2889 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query")),
2890 "TOOL_DEFINITIONS must contain browser_query"
2891 );
2892 }
2893
2894 #[test]
2895 fn tool_defs_include_browser_extract() {
2896 let defs = &*TOOL_DEFINITIONS;
2897 assert!(
2898 defs.iter()
2899 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract")),
2900 "TOOL_DEFINITIONS must contain browser_extract"
2901 );
2902 }
2903
2904 #[test]
2905 fn tool_defs_include_browser_acquire_and_extract() {
2906 let defs = &*TOOL_DEFINITIONS;
2907 assert!(
2908 defs.iter()
2909 .any(|t| t.get("name").and_then(|n| n.as_str())
2910 == Some("browser_acquire_and_extract")),
2911 "TOOL_DEFINITIONS must contain browser_acquire_and_extract"
2912 );
2913 }
2914
2915 #[test]
2916 fn tool_defs_include_browser_extract_with_fallback() {
2917 let defs = &*TOOL_DEFINITIONS;
2918 assert!(
2919 defs.iter()
2920 .any(|t| t.get("name").and_then(|n| n.as_str())
2921 == Some("browser_extract_with_fallback")),
2922 "TOOL_DEFINITIONS must contain browser_extract_with_fallback"
2923 );
2924 }
2925
2926 #[test]
2927 fn tool_defs_include_browser_extract_resilient() {
2928 let defs = &*TOOL_DEFINITIONS;
2929 assert!(
2930 defs.iter().any(
2931 |t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract_resilient")
2932 ),
2933 "TOOL_DEFINITIONS must contain browser_extract_resilient"
2934 );
2935 }
2936
2937 #[test]
2938 fn browser_extract_with_fallback_requires_root_selectors()
2939 -> std::result::Result<(), Box<dyn std::error::Error>> {
2940 let defs = &*TOOL_DEFINITIONS;
2941 let def = defs
2942 .iter()
2943 .find(|t| {
2944 t.get("name").and_then(|n| n.as_str()) == Some("browser_extract_with_fallback")
2945 })
2946 .ok_or("browser_extract_with_fallback must be in TOOL_DEFINITIONS")?;
2947 let required = def
2948 .get("inputSchema")
2949 .and_then(|s| s.get("required"))
2950 .and_then(Value::as_array)
2951 .ok_or("browser_extract_with_fallback inputSchema missing 'required' array")?;
2952 assert!(
2953 required.iter().any(|v| v == "root_selectors"),
2954 "root_selectors must be required in browser_extract_with_fallback"
2955 );
2956 Ok(())
2957 }
2958
2959 #[test]
2960 fn browser_query_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
2961 let defs = &*TOOL_DEFINITIONS;
2963 let def = defs
2964 .iter()
2965 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query"))
2966 .ok_or("browser_query must be in TOOL_DEFINITIONS")?;
2967 let required = def
2968 .get("inputSchema")
2969 .and_then(|s| s.get("required"))
2970 .ok_or("browser_query inputSchema missing 'required'")?;
2971 assert!(
2972 required
2973 .as_array()
2974 .is_some_and(|a| a.iter().any(|v| v == "session_id"))
2975 );
2976 assert!(
2977 required
2978 .as_array()
2979 .is_some_and(|a| a.iter().any(|v| v == "url"))
2980 );
2981 assert!(
2982 required
2983 .as_array()
2984 .is_some_and(|a| a.iter().any(|v| v == "selector"))
2985 );
2986 Ok(())
2987 }
2988
2989 #[test]
2990 fn browser_extract_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
2991 let defs = &*TOOL_DEFINITIONS;
2992 let def = defs
2993 .iter()
2994 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract"))
2995 .ok_or("browser_extract must be in TOOL_DEFINITIONS")?;
2996 let required = def
2997 .get("inputSchema")
2998 .and_then(|s| s.get("required"))
2999 .ok_or("browser_extract inputSchema missing 'required'")?;
3000 assert!(
3001 required
3002 .as_array()
3003 .is_some_and(|a| a.iter().any(|v| v == "root_selector"))
3004 );
3005 assert!(
3006 required
3007 .as_array()
3008 .is_some_and(|a| a.iter().any(|v| v == "schema"))
3009 );
3010 Ok(())
3011 }
3012
3013 #[test]
3014 fn browser_acquire_and_extract_required_args()
3015 -> std::result::Result<(), Box<dyn std::error::Error>> {
3016 let defs = &*TOOL_DEFINITIONS;
3017 let def = defs
3018 .iter()
3019 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_acquire_and_extract"))
3020 .ok_or("browser_acquire_and_extract must be in TOOL_DEFINITIONS")?;
3021
3022 let required = def
3023 .get("inputSchema")
3024 .and_then(|s| s.get("required"))
3025 .and_then(Value::as_array)
3026 .ok_or("browser_acquire_and_extract inputSchema missing 'required' array")?;
3027 assert!(required.iter().any(|v| v == "url"));
3028 assert!(required.iter().any(|v| v == "mode"));
3029
3030 let mode_values = def
3031 .get("inputSchema")
3032 .and_then(|s| s.get("properties"))
3033 .and_then(|p| p.get("mode"))
3034 .and_then(|m| m.get("enum"))
3035 .and_then(Value::as_array)
3036 .ok_or("browser_acquire_and_extract mode enum missing")?;
3037 assert!(mode_values.iter().any(|v| v == "fast"));
3038 assert!(mode_values.iter().any(|v| v == "resilient"));
3039 assert!(mode_values.iter().any(|v| v == "hostile"));
3040 assert!(mode_values.iter().any(|v| v == "investigate"));
3041 Ok(())
3042 }
3043
3044 #[test]
3045 fn acquisition_mode_parsing_accepts_all_supported_values()
3046 -> std::result::Result<(), Box<dyn std::error::Error>> {
3047 assert_eq!(
3048 McpBrowserServer::parse_acquisition_mode("fast")?,
3049 AcquisitionMode::Fast
3050 );
3051 assert_eq!(
3052 McpBrowserServer::parse_acquisition_mode("resilient")?,
3053 AcquisitionMode::Resilient
3054 );
3055 assert_eq!(
3056 McpBrowserServer::parse_acquisition_mode("hostile")?,
3057 AcquisitionMode::Hostile
3058 );
3059 assert_eq!(
3060 McpBrowserServer::parse_acquisition_mode("investigate")?,
3061 AcquisitionMode::Investigate
3062 );
3063 Ok(())
3064 }
3065
3066 #[test]
3067 fn acquisition_mode_parsing_rejects_unknown() {
3068 let err = McpBrowserServer::parse_acquisition_mode("invalid").err();
3069 assert!(err.is_some(), "invalid mode should return an error");
3070 }
3071
3072 #[test]
3073 fn acquisition_request_validation_missing_url_fails() {
3074 let err = McpBrowserServer::parse_acquisition_request(&json!({"mode": "fast"})).err();
3075 assert!(err.is_some(), "missing url should fail validation");
3076 }
3077
3078 #[test]
3079 fn acquisition_request_validation_invalid_timeout_fails() {
3080 let err = McpBrowserServer::parse_acquisition_request(&json!({
3081 "url": "https://example.com",
3082 "mode": "resilient",
3083 "total_timeout_secs": 0
3084 }))
3085 .err();
3086 assert!(err.is_some(), "zero timeout should fail validation");
3087 }
3088
3089 #[test]
3090 fn acquisition_result_output_has_stable_top_level_shape() {
3091 let result = AcquisitionResult {
3092 success: false,
3093 strategy_used: None,
3094 attempted: vec![crate::StrategyUsed::DirectHttp],
3095 final_url: Some("https://example.com".to_string()),
3096 status_code: Some(429),
3097 html_excerpt: Some("<html>blocked</html>".to_string()),
3098 extracted: None,
3099 failures: vec![crate::StageFailure {
3100 strategy: crate::StrategyUsed::DirectHttp,
3101 kind: crate::StageFailureKind::Blocked,
3102 message: "blocked status".to_string(),
3103 }],
3104 timed_out: false,
3105 };
3106
3107 let payload = McpBrowserServer::acquisition_result_to_tool_output(&result);
3108 assert!(payload.get("success").is_some());
3109 assert!(payload.get("strategy_used").is_some());
3110 assert!(payload.get("final_url").is_some());
3111 assert!(payload.get("status_code").is_some());
3112 assert!(payload.get("html_excerpt").is_some());
3113 assert!(payload.get("diagnostics").is_some());
3114
3115 let diagnostics = payload.get("diagnostics");
3116 assert!(
3117 diagnostics
3118 .and_then(|d| d.get("attempted"))
3119 .and_then(Value::as_array)
3120 .is_some(),
3121 "diagnostics.attempted should be an array"
3122 );
3123 assert!(
3124 diagnostics
3125 .and_then(|d| d.get("failures"))
3126 .and_then(Value::as_array)
3127 .is_some(),
3128 "diagnostics.failures should be an array"
3129 );
3130 }
3131
3132 #[test]
3133 fn jsonrpc_response_ok_serializes() -> std::result::Result<(), Box<dyn std::error::Error>> {
3134 let r = JsonRpcResponse::ok(json!(1), json!({ "hello": "world" }));
3135 let s = serde_json::to_string(&r)?;
3136 assert!(s.contains("\"hello\""));
3137 assert!(s.contains("\"jsonrpc\":\"2.0\""));
3138 assert!(!s.contains("\"error\""));
3139 Ok(())
3140 }
3141
3142 #[test]
3143 fn jsonrpc_response_err_serializes() -> std::result::Result<(), Box<dyn std::error::Error>> {
3144 let r = JsonRpcResponse::err(json!(2), -32601, "Method not found");
3145 let s = serde_json::to_string(&r)?;
3146 assert!(s.contains("-32601"));
3147 assert!(s.contains("Method not found"));
3148 assert!(!s.contains("\"result\""));
3149 Ok(())
3150 }
3151
3152 #[test]
3153 fn browser_extract_schema_parse_empty_schema()
3154 -> std::result::Result<(), Box<dyn std::error::Error>> {
3155 let defs = &*TOOL_DEFINITIONS;
3158 let def = defs
3159 .iter()
3160 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_extract"))
3161 .ok_or("browser_extract must be in TOOL_DEFINITIONS")?;
3162 let required = def
3163 .get("inputSchema")
3164 .and_then(|s| s.get("required"))
3165 .and_then(|r| r.as_array())
3166 .ok_or("browser_extract inputSchema missing 'required' array")?;
3167 assert!(
3168 required.iter().any(|v| v == "schema"),
3169 "schema must be required in browser_extract"
3170 );
3171 let schema_type = def
3173 .get("inputSchema")
3174 .and_then(|s| s.get("properties"))
3175 .and_then(|p| p.get("schema"))
3176 .and_then(|s| s.get("type"))
3177 .and_then(|t| t.as_str())
3178 .ok_or("browser_extract inputSchema.properties.schema.type missing")?;
3179 assert_eq!(
3180 schema_type, "object",
3181 "schema property must have type object"
3182 );
3183 Ok(())
3184 }
3185
3186 #[test]
3187 fn browser_query_missing_session() -> std::result::Result<(), Box<dyn std::error::Error>> {
3188 let defs = &*TOOL_DEFINITIONS;
3196 let def = defs
3197 .iter()
3198 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_query"))
3199 .ok_or("browser_query must be in TOOL_DEFINITIONS")?;
3200 let required = def
3201 .get("inputSchema")
3202 .and_then(|s| s.get("required"))
3203 .and_then(|r| r.as_array())
3204 .ok_or("browser_query inputSchema missing 'required' array")?;
3205 assert!(
3207 required.iter().any(|v| v == "session_id"),
3208 "session_id must be required so missing-session is caught at validation"
3209 );
3210 Ok(())
3211 }
3212
3213 #[test]
3214 fn mcp_env_disabled_by_default() {
3215 let cases = ["false", "0", "no", "", "off"];
3217 for val in cases {
3218 assert!(!mcp_enabled_from(val), "expected disabled for {val:?}");
3219 }
3220 }
3221
3222 #[test]
3223 fn mcp_env_enabled_values() {
3224 let cases = ["true", "True", "TRUE", "1", "yes", "YES"];
3225 for val in cases {
3226 assert!(mcp_enabled_from(val), "expected enabled for {val:?}");
3227 }
3228 }
3229
3230 #[test]
3231 fn browser_warmup_in_tool_definitions() -> std::result::Result<(), Box<dyn std::error::Error>> {
3232 let defs = &*TOOL_DEFINITIONS;
3233 let def = defs
3234 .iter()
3235 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_warmup"))
3236 .ok_or("browser_warmup must be in TOOL_DEFINITIONS")?;
3237 let required = def
3238 .get("inputSchema")
3239 .and_then(|s| s.get("required"))
3240 .and_then(|r| r.as_array())
3241 .ok_or("browser_warmup inputSchema missing 'required' array")?;
3242 assert!(
3243 required.iter().any(|v| v == "session_id"),
3244 "session_id must be required in browser_warmup"
3245 );
3246 assert!(
3247 required.iter().any(|v| v == "url"),
3248 "url must be required in browser_warmup"
3249 );
3250 Ok(())
3251 }
3252
3253 #[test]
3254 fn browser_refresh_in_tool_definitions() -> std::result::Result<(), Box<dyn std::error::Error>>
3255 {
3256 let defs = &*TOOL_DEFINITIONS;
3257 let def = defs
3258 .iter()
3259 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_refresh"))
3260 .ok_or("browser_refresh must be in TOOL_DEFINITIONS")?;
3261 let required = def
3262 .get("inputSchema")
3263 .and_then(|s| s.get("required"))
3264 .and_then(|r| r.as_array())
3265 .ok_or("browser_refresh inputSchema missing 'required' array")?;
3266 assert!(
3267 required.iter().any(|v| v == "session_id"),
3268 "session_id must be required in browser_refresh"
3269 );
3270 Ok(())
3271 }
3272
3273 #[test]
3274 fn tool_defs_include_browser_auth_session() {
3275 let defs = &*TOOL_DEFINITIONS;
3276 assert!(
3277 defs.iter()
3278 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_auth_session")),
3279 "TOOL_DEFINITIONS must contain browser_auth_session"
3280 );
3281 }
3282
3283 #[test]
3284 fn browser_auth_session_required_args() -> std::result::Result<(), Box<dyn std::error::Error>> {
3285 let defs = &*TOOL_DEFINITIONS;
3286 let def = defs
3287 .iter()
3288 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_auth_session"))
3289 .ok_or("browser_auth_session must be in TOOL_DEFINITIONS")?;
3290 let required = def
3291 .get("inputSchema")
3292 .and_then(|s| s.get("required"))
3293 .and_then(Value::as_array)
3294 .ok_or("browser_auth_session inputSchema missing 'required' array")?;
3295
3296 assert!(
3297 required.iter().any(|v| v == "session_id"),
3298 "session_id must be required in browser_auth_session"
3299 );
3300 assert!(
3301 required.iter().any(|v| v == "mode"),
3302 "mode must be required in browser_auth_session"
3303 );
3304 Ok(())
3305 }
3306
3307 #[test]
3308 fn tool_defs_include_browser_session_save() {
3309 let defs = &*TOOL_DEFINITIONS;
3310 assert!(
3311 defs.iter()
3312 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_session_save")),
3313 "TOOL_DEFINITIONS must contain browser_session_save"
3314 );
3315 }
3316
3317 #[test]
3318 fn tool_defs_include_browser_session_restore() {
3319 let defs = &*TOOL_DEFINITIONS;
3320 assert!(
3321 defs.iter()
3322 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_session_restore")),
3323 "TOOL_DEFINITIONS must contain browser_session_restore"
3324 );
3325 }
3326
3327 #[test]
3328 fn tool_defs_include_browser_humanize() {
3329 let defs = &*TOOL_DEFINITIONS;
3330 assert!(
3331 defs.iter()
3332 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_humanize")),
3333 "TOOL_DEFINITIONS must contain browser_humanize"
3334 );
3335 }
3336
3337 #[test]
3338 fn tool_defs_include_browser_apply_behavior_json() {
3339 let defs = &*TOOL_DEFINITIONS;
3340 assert!(
3341 defs.iter()
3342 .any(|t| t.get("name").and_then(|n| n.as_str())
3343 == Some("browser_apply_behavior_json")),
3344 "TOOL_DEFINITIONS must contain browser_apply_behavior_json"
3345 );
3346 }
3347
3348 #[test]
3349 fn browser_apply_behavior_json_requires_behavior()
3350 -> std::result::Result<(), Box<dyn std::error::Error>> {
3351 let defs = &*TOOL_DEFINITIONS;
3352 let def = defs
3353 .iter()
3354 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_apply_behavior_json"))
3355 .ok_or("browser_apply_behavior_json must be in TOOL_DEFINITIONS")?;
3356 let required = def
3357 .get("inputSchema")
3358 .and_then(|s| s.get("required"))
3359 .and_then(Value::as_array)
3360 .ok_or("browser_apply_behavior_json inputSchema missing required array")?;
3361 assert!(
3362 required.iter().any(|v| v == "behavior"),
3363 "behavior must be required in browser_apply_behavior_json"
3364 );
3365 Ok(())
3366 }
3367
3368 #[cfg(feature = "mcp-attach")]
3369 #[test]
3370 fn tool_defs_include_browser_attach() {
3371 let defs = &*TOOL_DEFINITIONS;
3372 assert!(
3373 defs.iter()
3374 .any(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_attach")),
3375 "TOOL_DEFINITIONS must contain browser_attach when mcp-attach is enabled"
3376 );
3377 }
3378
3379 #[cfg(feature = "mcp-attach")]
3380 #[test]
3381 fn browser_attach_schema_includes_target_profile()
3382 -> std::result::Result<(), Box<dyn std::error::Error>> {
3383 let defs = &*TOOL_DEFINITIONS;
3384 let def = defs
3385 .iter()
3386 .find(|t| t.get("name").and_then(|n| n.as_str()) == Some("browser_attach"))
3387 .ok_or("browser_attach must be in TOOL_DEFINITIONS")?;
3388 let props = def
3389 .get("inputSchema")
3390 .and_then(|s| s.get("properties"))
3391 .and_then(Value::as_object)
3392 .ok_or("browser_attach inputSchema missing properties")?;
3393 let target_profile = props
3394 .get("target_profile")
3395 .ok_or("browser_attach inputSchema missing target_profile")?;
3396 let enum_values = target_profile
3397 .get("enum")
3398 .and_then(Value::as_array)
3399 .ok_or("browser_attach target_profile missing enum")?;
3400
3401 assert!(
3402 enum_values.iter().any(|v| v == "default"),
3403 "browser_attach target_profile enum must include default"
3404 );
3405 assert!(
3406 enum_values.iter().any(|v| v == "reddit"),
3407 "browser_attach target_profile enum must include reddit"
3408 );
3409 Ok(())
3410 }
3411}