diff --git a/tests/tools/test_browser_cdp_tool.py b/tests/tools/test_browser_cdp_tool.py new file mode 100644 index 000000000..e7e187ceb --- /dev/null +++ b/tests/tools/test_browser_cdp_tool.py @@ -0,0 +1,408 @@ +"""Unit tests for browser_cdp tool. + +Uses a tiny in-process ``websockets`` server to simulate a CDP endpoint — +gives real protocol coverage (connect, send, recv, close) without needing +a real Chrome instance. +""" +from __future__ import annotations + +import asyncio +import json +import threading +import time +from typing import Any, Dict, List + +import pytest + +import websockets +from websockets.asyncio.server import serve + +from tools import browser_cdp_tool + + +# --------------------------------------------------------------------------- +# In-process CDP mock server +# --------------------------------------------------------------------------- + + +class _CDPServer: + """A tiny CDP-over-WebSocket mock. + + Each client gets a greeting-free stream. The server replies to each + inbound request whose ``id`` is set, using the registered handler for + that method. If no handler is registered, returns a generic CDP error. + """ + + def __init__(self) -> None: + self._handlers: Dict[str, Any] = {} + self._responses: List[Dict[str, Any]] = [] + self._loop: asyncio.AbstractEventLoop | None = None + self._server: Any = None + self._thread: threading.Thread | None = None + self._host = "127.0.0.1" + self._port = 0 + + # --- handler registration -------------------------------------------- + + def on(self, method: str, handler): + """Register a handler ``handler(params, session_id) -> dict or Exception``.""" + self._handlers[method] = handler + + # --- lifecycle ------------------------------------------------------- + + def start(self) -> str: + ready = threading.Event() + + def _run() -> None: + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + + async def _handler(ws): + try: + async for raw in ws: + msg = json.loads(raw) + call_id = msg.get("id") + method = msg.get("method", "") + params = msg.get("params", {}) or {} + session_id = msg.get("sessionId") + self._responses.append(msg) + + fn = self._handlers.get(method) + if fn is None: + reply = { + "id": call_id, + "error": { + "code": -32601, + "message": f"No handler for {method}", + }, + } + else: + try: + result = fn(params, session_id) + if isinstance(result, Exception): + raise result + reply = {"id": call_id, "result": result} + except Exception as exc: + reply = { + "id": call_id, + "error": {"code": -1, "message": str(exc)}, + } + if session_id: + reply["sessionId"] = session_id + await ws.send(json.dumps(reply)) + except websockets.exceptions.ConnectionClosed: + pass + + async def _serve() -> None: + self._server = await serve(_handler, self._host, 0) + sock = next(iter(self._server.sockets)) + self._port = sock.getsockname()[1] + ready.set() + await self._server.wait_closed() + + try: + self._loop.run_until_complete(_serve()) + finally: + self._loop.close() + + self._thread = threading.Thread(target=_run, daemon=True) + self._thread.start() + if not ready.wait(timeout=5.0): + raise RuntimeError("CDP mock server failed to start within 5s") + return f"ws://{self._host}:{self._port}/devtools/browser/mock" + + def stop(self) -> None: + if self._loop and self._server: + def _close() -> None: + self._server.close() + + self._loop.call_soon_threadsafe(_close) + if self._thread: + self._thread.join(timeout=3.0) + + def received(self) -> List[Dict[str, Any]]: + return list(self._responses) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def cdp_server(monkeypatch): + """Start a CDP mock and route tool resolution to it.""" + server = _CDPServer() + ws_url = server.start() + monkeypatch.setattr( + browser_cdp_tool, "_resolve_cdp_endpoint", lambda: ws_url + ) + try: + yield server + finally: + server.stop() + + +# --------------------------------------------------------------------------- +# Input validation +# --------------------------------------------------------------------------- + + +def test_missing_method_returns_error(): + result = json.loads(browser_cdp_tool.browser_cdp(method="")) + assert "error" in result + assert "method" in result["error"].lower() + assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL + + +def test_non_string_method_returns_error(): + result = json.loads(browser_cdp_tool.browser_cdp(method=123)) # type: ignore[arg-type] + assert "error" in result + assert "method" in result["error"].lower() + + +def test_non_dict_params_returns_error(monkeypatch): + monkeypatch.setattr( + browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "ws://localhost:9999" + ) + result = json.loads( + browser_cdp_tool.browser_cdp(method="Target.getTargets", params="not-a-dict") # type: ignore[arg-type] + ) + assert "error" in result + assert "object" in result["error"].lower() or "dict" in result["error"].lower() + + +# --------------------------------------------------------------------------- +# Endpoint resolution +# --------------------------------------------------------------------------- + + +def test_no_endpoint_returns_helpful_error(monkeypatch): + monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "") + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert "error" in result + assert "/browser connect" in result["error"] + assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL + + +def test_non_ws_endpoint_returns_error(monkeypatch): + monkeypatch.setattr( + browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "http://localhost:9222" + ) + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert "error" in result + assert "WebSocket" in result["error"] + + +def test_websockets_missing_returns_error(monkeypatch): + monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False) + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert "error" in result + assert "websockets" in result["error"].lower() + + +# --------------------------------------------------------------------------- +# Happy-path: browser-level call +# --------------------------------------------------------------------------- + + +def test_browser_level_success(cdp_server): + cdp_server.on( + "Target.getTargets", + lambda params, sid: { + "targetInfos": [ + {"targetId": "A", "type": "page", "title": "Tab 1", "url": "about:blank"}, + {"targetId": "B", "type": "page", "title": "Tab 2", "url": "https://a.test"}, + ] + }, + ) + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert result["success"] is True + assert result["method"] == "Target.getTargets" + assert "target_id" not in result + assert len(result["result"]["targetInfos"]) == 2 + # Verify the server actually received exactly one call (no extra traffic) + calls = cdp_server.received() + assert len(calls) == 1 + assert calls[0]["method"] == "Target.getTargets" + assert "sessionId" not in calls[0] + + +def test_empty_params_sends_empty_object(cdp_server): + cdp_server.on("Browser.getVersion", lambda params, sid: {"product": "Mock/1.0"}) + json.loads(browser_cdp_tool.browser_cdp(method="Browser.getVersion")) + assert cdp_server.received()[0]["params"] == {} + + +# --------------------------------------------------------------------------- +# Happy-path: target-attached call +# --------------------------------------------------------------------------- + + +def test_target_attach_then_call(cdp_server): + cdp_server.on( + "Target.attachToTarget", + lambda params, sid: {"sessionId": f"sess-{params['targetId']}"}, + ) + cdp_server.on( + "Runtime.evaluate", + lambda params, sid: { + "result": {"type": "string", "value": f"evaluated[{sid}]"}, + }, + ) + result = json.loads( + browser_cdp_tool.browser_cdp( + method="Runtime.evaluate", + params={"expression": "document.title", "returnByValue": True}, + target_id="tab-A", + ) + ) + assert result["success"] is True + assert result["target_id"] == "tab-A" + assert result["result"]["result"]["value"] == "evaluated[sess-tab-A]" + + calls = cdp_server.received() + # First call: attach + assert calls[0]["method"] == "Target.attachToTarget" + assert calls[0]["params"] == {"targetId": "tab-A", "flatten": True} + # Second call: dispatched method on the session + assert calls[1]["method"] == "Runtime.evaluate" + assert calls[1]["sessionId"] == "sess-tab-A" + + +# --------------------------------------------------------------------------- +# CDP error responses +# --------------------------------------------------------------------------- + + +def test_cdp_method_error_returns_tool_error(cdp_server): + # No handler registered -> server returns CDP error + result = json.loads( + browser_cdp_tool.browser_cdp(method="NonExistent.method") + ) + assert "error" in result + assert "CDP error" in result["error"] + assert result.get("method") == "NonExistent.method" + + +def test_attach_failure_returns_tool_error(cdp_server): + # Target.attachToTarget has no handler -> server errors on attach + result = json.loads( + browser_cdp_tool.browser_cdp( + method="Runtime.evaluate", + params={"expression": "1+1"}, + target_id="missing", + ) + ) + assert "error" in result + assert "Target.attachToTarget" in result["error"] + + +# --------------------------------------------------------------------------- +# Timeouts +# --------------------------------------------------------------------------- + + +def test_timeout_when_server_never_replies(cdp_server): + # Register a handler that blocks forever + def slow(params, sid): + time.sleep(10) + return {} + + cdp_server.on("Page.slowMethod", slow) + result = json.loads( + browser_cdp_tool.browser_cdp( + method="Page.slowMethod", timeout=0.5 + ) + ) + assert "error" in result + assert "tim" in result["error"].lower() + + +# --------------------------------------------------------------------------- +# Timeout clamping +# --------------------------------------------------------------------------- + + +def test_timeout_clamped_above_max(cdp_server): + cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"}) + # timeout=10_000 should be clamped to 300 but still succeed + result = json.loads( + browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout=10_000) + ) + assert result["success"] is True + + +def test_invalid_timeout_falls_back_to_default(cdp_server): + cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"}) + result = json.loads( + browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout="nope") # type: ignore[arg-type] + ) + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Registry integration +# --------------------------------------------------------------------------- + + +def test_registered_in_browser_toolset(): + from tools.registry import registry + + entry = registry.get_entry("browser_cdp") + assert entry is not None + assert entry.toolset == "browser" + assert entry.schema["name"] == "browser_cdp" + assert entry.schema["parameters"]["required"] == ["method"] + assert "Chrome DevTools Protocol" in entry.schema["description"] + assert browser_cdp_tool.CDP_DOCS_URL in entry.schema["description"] + + +def test_dispatch_through_registry(cdp_server): + from tools.registry import registry + + cdp_server.on("Target.getTargets", lambda p, s: {"targetInfos": []}) + raw = registry.dispatch( + "browser_cdp", {"method": "Target.getTargets"}, task_id="t1" + ) + result = json.loads(raw) + assert result["success"] is True + assert result["method"] == "Target.getTargets" + + +# --------------------------------------------------------------------------- +# check_fn gating +# --------------------------------------------------------------------------- + + +def test_check_fn_false_when_no_cdp_url(monkeypatch): + """Gate closes when no CDP URL is set — even if the browser toolset is + otherwise configured.""" + import tools.browser_tool as bt + + monkeypatch.setattr(bt, "check_browser_requirements", lambda: True) + monkeypatch.setattr(bt, "_get_cdp_override", lambda: "") + assert browser_cdp_tool._browser_cdp_check() is False + + +def test_check_fn_true_when_cdp_url_set(monkeypatch): + """Gate opens as soon as a CDP URL is resolvable.""" + import tools.browser_tool as bt + + monkeypatch.setattr(bt, "check_browser_requirements", lambda: True) + monkeypatch.setattr( + bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x" + ) + assert browser_cdp_tool._browser_cdp_check() is True + + +def test_check_fn_false_when_browser_requirements_fail(monkeypatch): + """Even with a CDP URL, gate closes if the overall browser toolset is + unavailable (e.g. agent-browser not installed).""" + import tools.browser_tool as bt + + monkeypatch.setattr(bt, "check_browser_requirements", lambda: False) + monkeypatch.setattr( + bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x" + ) + assert browser_cdp_tool._browser_cdp_check() is False diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py new file mode 100644 index 000000000..7817b9c35 --- /dev/null +++ b/tools/browser_cdp_tool.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +""" +Raw Chrome DevTools Protocol (CDP) passthrough tool. + +Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to +the browser's DevTools WebSocket endpoint. Works when a CDP URL is +configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or +``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider +session is active. + +This is the escape hatch for browser operations not covered by the main +browser tool surface (``browser_navigate``, ``browser_click``, +``browser_console``, etc.) — handling native dialogs, iframe-scoped +evaluation, cookie/network control, low-level tab management, etc. + +Method reference: https://chromedevtools.github.io/devtools-protocol/ +""" +from __future__ import annotations + +import asyncio +import json +import logging +import os +from typing import Any, Dict, Optional + +from tools.registry import registry, tool_error + +logger = logging.getLogger(__name__) + +CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/" + +# ``websockets`` is a transitive dependency of hermes-agent (via fal_client +# and firecrawl-py) and is already imported by gateway/platforms/feishu.py. +# Wrap the import so a clean error surfaces if the package is ever absent. +try: + import websockets + from websockets.exceptions import WebSocketException + + _WS_AVAILABLE = True +except ImportError: + websockets = None # type: ignore[assignment] + WebSocketException = Exception # type: ignore[assignment,misc] + _WS_AVAILABLE = False + + +# --------------------------------------------------------------------------- +# Async-from-sync bridge (matches the pattern in homeassistant_tool.py) +# --------------------------------------------------------------------------- + + +def _run_async(coro): + """Run an async coroutine from a sync handler, safe inside or outside a loop.""" + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop and loop.is_running(): + import concurrent.futures + + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + future = pool.submit(asyncio.run, coro) + return future.result() + return asyncio.run(coro) + + +# --------------------------------------------------------------------------- +# Endpoint resolution +# --------------------------------------------------------------------------- + + +def _resolve_cdp_endpoint() -> str: + """Return the normalized CDP WebSocket URL, or empty string if unavailable. + + Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays + consistent with the rest of the browser tool surface: + + 1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``) + 2. ``browser.cdp_url`` in ``config.yaml`` + """ + try: + from tools.browser_tool import _get_cdp_override # type: ignore[import-not-found] + + return (_get_cdp_override() or "").strip() + except Exception as exc: # pragma: no cover — defensive + logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc) + return "" + + +# --------------------------------------------------------------------------- +# Core CDP call +# --------------------------------------------------------------------------- + + +async def _cdp_call( + ws_url: str, + method: str, + params: Dict[str, Any], + target_id: Optional[str], + timeout: float, +) -> Dict[str, Any]: + """Make a single CDP call, optionally attaching to a target first. + + When ``target_id`` is provided, we call ``Target.attachToTarget`` with + ``flatten=True`` to multiplex a page-level session over the same + browser-level WebSocket, then send ``method`` with that ``sessionId``. + When ``target_id`` is None, ``method`` is sent at browser level — which + works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other + globally-scoped domains. + """ + assert websockets is not None # guarded by _WS_AVAILABLE at call-site + + async with websockets.connect( + ws_url, + max_size=None, # CDP responses (e.g. DOM.getDocument) can be large + open_timeout=timeout, + close_timeout=5, + ping_interval=None, # CDP server doesn't expect pings + ) as ws: + next_id = 1 + session_id: Optional[str] = None + + # --- Step 1: attach to target if requested --- + if target_id: + attach_id = next_id + next_id += 1 + await ws.send( + json.dumps( + { + "id": attach_id, + "method": "Target.attachToTarget", + "params": {"targetId": target_id, "flatten": True}, + } + ) + ) + deadline = asyncio.get_event_loop().time() + timeout + while True: + remaining = deadline - asyncio.get_event_loop().time() + if remaining <= 0: + raise TimeoutError( + f"Timed out attaching to target {target_id}" + ) + raw = await asyncio.wait_for(ws.recv(), timeout=remaining) + msg = json.loads(raw) + if msg.get("id") == attach_id: + if "error" in msg: + raise RuntimeError( + f"Target.attachToTarget failed: {msg['error']}" + ) + session_id = msg.get("result", {}).get("sessionId") + if not session_id: + raise RuntimeError( + "Target.attachToTarget did not return a sessionId" + ) + break + # Ignore events (messages without "id") while waiting + + # --- Step 2: dispatch the real method --- + call_id = next_id + next_id += 1 + req: Dict[str, Any] = { + "id": call_id, + "method": method, + "params": params or {}, + } + if session_id: + req["sessionId"] = session_id + await ws.send(json.dumps(req)) + + deadline = asyncio.get_event_loop().time() + timeout + while True: + remaining = deadline - asyncio.get_event_loop().time() + if remaining <= 0: + raise TimeoutError( + f"Timed out waiting for response to {method}" + ) + raw = await asyncio.wait_for(ws.recv(), timeout=remaining) + msg = json.loads(raw) + if msg.get("id") == call_id: + if "error" in msg: + raise RuntimeError(f"CDP error: {msg['error']}") + return msg.get("result", {}) + # Ignore events / out-of-order responses + + +# --------------------------------------------------------------------------- +# Public tool function +# --------------------------------------------------------------------------- + + +def browser_cdp( + method: str, + params: Optional[Dict[str, Any]] = None, + target_id: Optional[str] = None, + timeout: float = 30.0, + task_id: Optional[str] = None, +) -> str: + """Send a raw CDP command. See ``CDP_DOCS_URL`` for method documentation. + + Args: + method: CDP method name, e.g. ``"Target.getTargets"``. + params: Method-specific parameters; defaults to ``{}``. + target_id: Optional target/tab ID for page-level methods. When set, + we first attach to the target (``flatten=True``) and send + ``method`` with the resulting ``sessionId``. + timeout: Seconds to wait for the call to complete. + task_id: Unused (tool is stateless) — accepted for uniformity with + other browser tools. + + Returns: + JSON string ``{"success": True, "method": ..., "result": {...}}`` on + success, or ``{"error": "..."}`` on failure. + """ + del task_id # unused — stateless + + if not method or not isinstance(method, str): + return tool_error( + "'method' is required (e.g. 'Target.getTargets')", + cdp_docs=CDP_DOCS_URL, + ) + + if not _WS_AVAILABLE: + return tool_error( + "The 'websockets' Python package is required but not installed. " + "Install it with: pip install websockets" + ) + + endpoint = _resolve_cdp_endpoint() + if not endpoint: + return tool_error( + "No CDP endpoint is available. Run '/browser connect' to attach " + "to a running Chrome, or set 'browser.cdp_url' in config.yaml. " + "The Camofox backend is REST-only and does not expose CDP.", + cdp_docs=CDP_DOCS_URL, + ) + + if not endpoint.startswith(("ws://", "wss://")): + return tool_error( + f"CDP endpoint is not a WebSocket URL: {endpoint!r}. " + "Expected ws://... or wss://... — the /browser connect " + "resolver should have rewritten this. Check that Chrome is " + "actually listening on the debug port." + ) + + call_params: Dict[str, Any] = params or {} + if not isinstance(call_params, dict): + return tool_error( + f"'params' must be an object/dict, got {type(call_params).__name__}" + ) + + try: + safe_timeout = float(timeout) if timeout else 30.0 + except (TypeError, ValueError): + safe_timeout = 30.0 + safe_timeout = max(1.0, min(safe_timeout, 300.0)) + + try: + result = _run_async( + _cdp_call(endpoint, method, call_params, target_id, safe_timeout) + ) + except asyncio.TimeoutError as exc: + return tool_error( + f"CDP call timed out after {safe_timeout}s: {exc}", + method=method, + ) + except TimeoutError as exc: + return tool_error(str(exc), method=method) + except RuntimeError as exc: + return tool_error(str(exc), method=method) + except WebSocketException as exc: + return tool_error( + f"WebSocket error talking to CDP at {endpoint}: {exc}. The " + "browser may have disconnected — try '/browser connect' again.", + method=method, + ) + except Exception as exc: # pragma: no cover — unexpected + logger.exception("browser_cdp unexpected error") + return tool_error( + f"Unexpected error: {type(exc).__name__}: {exc}", + method=method, + ) + + payload: Dict[str, Any] = { + "success": True, + "method": method, + "result": result, + } + if target_id: + payload["target_id"] = target_id + return json.dumps(payload, ensure_ascii=False) + + +# --------------------------------------------------------------------------- +# Registry +# --------------------------------------------------------------------------- + + +BROWSER_CDP_SCHEMA: Dict[str, Any] = { + "name": "browser_cdp", + "description": ( + "Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for " + "browser operations not covered by browser_navigate, browser_click, " + "browser_console, etc.\n\n" + "**Requires a reachable CDP endpoint.** Available when the user has " + "run '/browser connect' to attach to a running Chrome, or when " + "'browser.cdp_url' is set in config.yaml. Not currently wired up for " + "cloud backends (Browserbase, Browser Use, Firecrawl) — those expose " + "CDP per session but live-session routing is a follow-up. Camofox is " + "REST-only and will never support CDP. If the tool is in your toolset " + "at all, a CDP endpoint is already reachable.\n\n" + f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a " + "method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') " + "to look up parameters and return shape.\n\n" + "**Common patterns:**\n" + "- List tabs: method='Target.getTargets', params={}\n" + "- Handle a native JS dialog: method='Page.handleJavaScriptDialog', " + "params={'accept': true, 'promptText': ''}, target_id=\n" + "- Get all cookies: method='Network.getAllCookies', params={}\n" + "- Eval in a specific tab: method='Runtime.evaluate', " + "params={'expression': '...', 'returnByValue': true}, " + "target_id=\n" + "- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', " + "params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, " + "'mobile': false}, target_id=\n\n" + "**Usage rules:**\n" + "- Browser-level methods (Target.*, Browser.*, Storage.*): omit " + "target_id.\n" + "- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, " + "Network.* scoped to a tab): pass target_id from Target.getTargets.\n" + "- Each call is independent — sessions and event subscriptions do " + "not persist between calls. For stateful workflows, prefer the " + "dedicated browser tools." + ), + "parameters": { + "type": "object", + "properties": { + "method": { + "type": "string", + "description": ( + "CDP method name, e.g. 'Target.getTargets', " + "'Runtime.evaluate', 'Page.handleJavaScriptDialog'." + ), + }, + "params": { + "type": "object", + "description": ( + "Method-specific parameters as a JSON object. Omit or " + "pass {} for methods that take no parameters." + ), + "additionalProperties": True, + }, + "target_id": { + "type": "string", + "description": ( + "Optional. Target/tab ID from Target.getTargets result " + "(each entry's 'targetId'). Required for page-level " + "methods; must be omitted for browser-level methods." + ), + }, + "timeout": { + "type": "number", + "description": ( + "Timeout in seconds (default 30, max 300)." + ), + "default": 30, + }, + }, + "required": ["method"], + }, +} + + +def _browser_cdp_check() -> bool: + """Availability check for browser_cdp. + + The tool is only offered when the Python side can actually reach a CDP + endpoint right now — meaning a static URL is set via ``/browser connect`` + (``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``. + + Backends that do *not* currently expose CDP to us — Camofox (REST-only), + the default local agent-browser mode (Playwright hides its internal CDP + port), and cloud providers whose per-session ``cdp_url`` is not yet + surfaced — are gated out so the model doesn't see a tool that would + reliably fail. Cloud-provider CDP routing is a follow-up. + + Kept in a thin wrapper so the registration statement stays at module top + level (the tool-discovery AST scan only picks up top-level + ``registry.register(...)`` calls). + """ + try: + from tools.browser_tool import ( # type: ignore[import-not-found] + _get_cdp_override, + check_browser_requirements, + ) + except ImportError as exc: # pragma: no cover — defensive + logger.debug("browser_cdp check: browser_tool import failed: %s", exc) + return False + if not check_browser_requirements(): + return False + return bool(_get_cdp_override()) + + +registry.register( + name="browser_cdp", + toolset="browser", + schema=BROWSER_CDP_SCHEMA, + handler=lambda args, **kw: browser_cdp( + method=args.get("method", ""), + params=args.get("params"), + target_id=args.get("target_id"), + timeout=args.get("timeout", 30.0), + task_id=kw.get("task_id"), + ), + check_fn=_browser_cdp_check, + emoji="🧪", +) diff --git a/toolsets.py b/toolsets.py index 6ac8d0782..d9f353e1f 100644 --- a/toolsets.py +++ b/toolsets.py @@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [ "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", + "browser_vision", "browser_console", "browser_cdp", # Text-to-speech "text_to_speech", # Planning & memory @@ -115,7 +115,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "web_search" + "browser_vision", "browser_console", "browser_cdp", "web_search" ], "includes": [] }, @@ -249,7 +249,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", + "browser_vision", "browser_console", "browser_cdp", "todo", "memory", "session_search", "execute_code", "delegate_task", @@ -274,7 +274,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", + "browser_vision", "browser_console", "browser_cdp", # Planning & memory "todo", "memory", # Session history search diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md index 40d44627e..c255c8f6a 100644 --- a/website/docs/reference/tools-reference.md +++ b/website/docs/reference/tools-reference.md @@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool # Built-in Tools Reference -This page documents all 52 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets. +This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets. -**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets. +**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets. :::tip MCP Tools In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration. @@ -19,6 +19,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server | Tool | Description | Requires environment | |------|-------------|----------------------| | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — | +| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — | | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — | | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — | | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — | diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md index 7593a3fdc..bb911004e 100644 --- a/website/docs/reference/toolsets-reference.md +++ b/website/docs/reference/toolsets-reference.md @@ -52,7 +52,7 @@ Or in-session: | Toolset | Tools | Purpose | |---------|-------|---------| -| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. | +| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. | | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. | | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. | | `cronjob` | `cronjob` | Schedule and manage recurring tasks. | diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index 5b2462d2e..d6624bf7d 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -327,6 +327,36 @@ Check the browser console for any JavaScript errors Use `clear=True` to clear the console after reading, so subsequent calls only show new messages. +### `browser_cdp` + +Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs. + +**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up. + +**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape. + +Common patterns: + +``` +# List tabs (browser-level, no target_id) +browser_cdp(method="Target.getTargets") + +# Handle a native JS dialog on a tab +browser_cdp(method="Page.handleJavaScriptDialog", + params={"accept": true, "promptText": ""}, + target_id="") + +# Evaluate JS in a specific tab +browser_cdp(method="Runtime.evaluate", + params={"expression": "document.title", "returnByValue": true}, + target_id="") + +# Get all cookies +browser_cdp(method="Network.getAllCookies") +``` + +Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls. + ## Practical Examples ### Filling Out a Web Form