feat(browser): add browser_cdp raw DevTools Protocol passthrough (#12369)

Agents can now send arbitrary CDP commands to the browser. The tool is
gated on a reachable CDP endpoint at session start — it only appears in
the toolset when BROWSER_CDP_URL is set (from '/browser connect') or
'browser.cdp_url' is configured in config.yaml. Backends that don't
currently expose CDP to the Python side (Camofox, default local
agent-browser, cloud providers whose per-session cdp_url is not yet
surfaced) do not see the tool at all.

Tool schema description links to the CDP method reference at
https://chromedevtools.github.io/devtools-protocol/ so the agent can
web_extract specific method docs on demand.

Stateless per call. Browser-level methods (Target.*, Browser.*,
Storage.*) omit target_id. Page-level methods attach to the target
with flatten=true and dispatch the method on the returned sessionId.
Clean errors when the endpoint becomes unreachable mid-session or
the URL isn't a WebSocket.

Tests: 19 unit (mock CDP server + gate checks) + E2E against real
headless Chrome (Target.getTargets, Browser.getVersion,
Runtime.evaluate with target_id, Page.navigate + re-eval, bogus
method, bogus target_id, missing endpoint) + E2E of the check_fn
gate (tool hidden without CDP URL, visible with it, hidden again
after unset).
This commit is contained in:
Teknium 2026-04-19 00:03:10 -07:00 committed by GitHub
parent d66414a844
commit ce410521b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 862 additions and 7 deletions

View file

@ -0,0 +1,408 @@
"""Unit tests for browser_cdp tool.
Uses a tiny in-process ``websockets`` server to simulate a CDP endpoint
gives real protocol coverage (connect, send, recv, close) without needing
a real Chrome instance.
"""
from __future__ import annotations
import asyncio
import json
import threading
import time
from typing import Any, Dict, List
import pytest
import websockets
from websockets.asyncio.server import serve
from tools import browser_cdp_tool
# ---------------------------------------------------------------------------
# In-process CDP mock server
# ---------------------------------------------------------------------------
class _CDPServer:
"""A tiny CDP-over-WebSocket mock.
Each client gets a greeting-free stream. The server replies to each
inbound request whose ``id`` is set, using the registered handler for
that method. If no handler is registered, returns a generic CDP error.
"""
def __init__(self) -> None:
self._handlers: Dict[str, Any] = {}
self._responses: List[Dict[str, Any]] = []
self._loop: asyncio.AbstractEventLoop | None = None
self._server: Any = None
self._thread: threading.Thread | None = None
self._host = "127.0.0.1"
self._port = 0
# --- handler registration --------------------------------------------
def on(self, method: str, handler):
"""Register a handler ``handler(params, session_id) -> dict or Exception``."""
self._handlers[method] = handler
# --- lifecycle -------------------------------------------------------
def start(self) -> str:
ready = threading.Event()
def _run() -> None:
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(self._loop)
async def _handler(ws):
try:
async for raw in ws:
msg = json.loads(raw)
call_id = msg.get("id")
method = msg.get("method", "")
params = msg.get("params", {}) or {}
session_id = msg.get("sessionId")
self._responses.append(msg)
fn = self._handlers.get(method)
if fn is None:
reply = {
"id": call_id,
"error": {
"code": -32601,
"message": f"No handler for {method}",
},
}
else:
try:
result = fn(params, session_id)
if isinstance(result, Exception):
raise result
reply = {"id": call_id, "result": result}
except Exception as exc:
reply = {
"id": call_id,
"error": {"code": -1, "message": str(exc)},
}
if session_id:
reply["sessionId"] = session_id
await ws.send(json.dumps(reply))
except websockets.exceptions.ConnectionClosed:
pass
async def _serve() -> None:
self._server = await serve(_handler, self._host, 0)
sock = next(iter(self._server.sockets))
self._port = sock.getsockname()[1]
ready.set()
await self._server.wait_closed()
try:
self._loop.run_until_complete(_serve())
finally:
self._loop.close()
self._thread = threading.Thread(target=_run, daemon=True)
self._thread.start()
if not ready.wait(timeout=5.0):
raise RuntimeError("CDP mock server failed to start within 5s")
return f"ws://{self._host}:{self._port}/devtools/browser/mock"
def stop(self) -> None:
if self._loop and self._server:
def _close() -> None:
self._server.close()
self._loop.call_soon_threadsafe(_close)
if self._thread:
self._thread.join(timeout=3.0)
def received(self) -> List[Dict[str, Any]]:
return list(self._responses)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def cdp_server(monkeypatch):
"""Start a CDP mock and route tool resolution to it."""
server = _CDPServer()
ws_url = server.start()
monkeypatch.setattr(
browser_cdp_tool, "_resolve_cdp_endpoint", lambda: ws_url
)
try:
yield server
finally:
server.stop()
# ---------------------------------------------------------------------------
# Input validation
# ---------------------------------------------------------------------------
def test_missing_method_returns_error():
result = json.loads(browser_cdp_tool.browser_cdp(method=""))
assert "error" in result
assert "method" in result["error"].lower()
assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
def test_non_string_method_returns_error():
result = json.loads(browser_cdp_tool.browser_cdp(method=123)) # type: ignore[arg-type]
assert "error" in result
assert "method" in result["error"].lower()
def test_non_dict_params_returns_error(monkeypatch):
monkeypatch.setattr(
browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "ws://localhost:9999"
)
result = json.loads(
browser_cdp_tool.browser_cdp(method="Target.getTargets", params="not-a-dict") # type: ignore[arg-type]
)
assert "error" in result
assert "object" in result["error"].lower() or "dict" in result["error"].lower()
# ---------------------------------------------------------------------------
# Endpoint resolution
# ---------------------------------------------------------------------------
def test_no_endpoint_returns_helpful_error(monkeypatch):
monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
assert "error" in result
assert "/browser connect" in result["error"]
assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
def test_non_ws_endpoint_returns_error(monkeypatch):
monkeypatch.setattr(
browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "http://localhost:9222"
)
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
assert "error" in result
assert "WebSocket" in result["error"]
def test_websockets_missing_returns_error(monkeypatch):
monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False)
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
assert "error" in result
assert "websockets" in result["error"].lower()
# ---------------------------------------------------------------------------
# Happy-path: browser-level call
# ---------------------------------------------------------------------------
def test_browser_level_success(cdp_server):
cdp_server.on(
"Target.getTargets",
lambda params, sid: {
"targetInfos": [
{"targetId": "A", "type": "page", "title": "Tab 1", "url": "about:blank"},
{"targetId": "B", "type": "page", "title": "Tab 2", "url": "https://a.test"},
]
},
)
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
assert result["success"] is True
assert result["method"] == "Target.getTargets"
assert "target_id" not in result
assert len(result["result"]["targetInfos"]) == 2
# Verify the server actually received exactly one call (no extra traffic)
calls = cdp_server.received()
assert len(calls) == 1
assert calls[0]["method"] == "Target.getTargets"
assert "sessionId" not in calls[0]
def test_empty_params_sends_empty_object(cdp_server):
cdp_server.on("Browser.getVersion", lambda params, sid: {"product": "Mock/1.0"})
json.loads(browser_cdp_tool.browser_cdp(method="Browser.getVersion"))
assert cdp_server.received()[0]["params"] == {}
# ---------------------------------------------------------------------------
# Happy-path: target-attached call
# ---------------------------------------------------------------------------
def test_target_attach_then_call(cdp_server):
cdp_server.on(
"Target.attachToTarget",
lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
)
cdp_server.on(
"Runtime.evaluate",
lambda params, sid: {
"result": {"type": "string", "value": f"evaluated[{sid}]"},
},
)
result = json.loads(
browser_cdp_tool.browser_cdp(
method="Runtime.evaluate",
params={"expression": "document.title", "returnByValue": True},
target_id="tab-A",
)
)
assert result["success"] is True
assert result["target_id"] == "tab-A"
assert result["result"]["result"]["value"] == "evaluated[sess-tab-A]"
calls = cdp_server.received()
# First call: attach
assert calls[0]["method"] == "Target.attachToTarget"
assert calls[0]["params"] == {"targetId": "tab-A", "flatten": True}
# Second call: dispatched method on the session
assert calls[1]["method"] == "Runtime.evaluate"
assert calls[1]["sessionId"] == "sess-tab-A"
# ---------------------------------------------------------------------------
# CDP error responses
# ---------------------------------------------------------------------------
def test_cdp_method_error_returns_tool_error(cdp_server):
# No handler registered -> server returns CDP error
result = json.loads(
browser_cdp_tool.browser_cdp(method="NonExistent.method")
)
assert "error" in result
assert "CDP error" in result["error"]
assert result.get("method") == "NonExistent.method"
def test_attach_failure_returns_tool_error(cdp_server):
# Target.attachToTarget has no handler -> server errors on attach
result = json.loads(
browser_cdp_tool.browser_cdp(
method="Runtime.evaluate",
params={"expression": "1+1"},
target_id="missing",
)
)
assert "error" in result
assert "Target.attachToTarget" in result["error"]
# ---------------------------------------------------------------------------
# Timeouts
# ---------------------------------------------------------------------------
def test_timeout_when_server_never_replies(cdp_server):
# Register a handler that blocks forever
def slow(params, sid):
time.sleep(10)
return {}
cdp_server.on("Page.slowMethod", slow)
result = json.loads(
browser_cdp_tool.browser_cdp(
method="Page.slowMethod", timeout=0.5
)
)
assert "error" in result
assert "tim" in result["error"].lower()
# ---------------------------------------------------------------------------
# Timeout clamping
# ---------------------------------------------------------------------------
def test_timeout_clamped_above_max(cdp_server):
cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
# timeout=10_000 should be clamped to 300 but still succeed
result = json.loads(
browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout=10_000)
)
assert result["success"] is True
def test_invalid_timeout_falls_back_to_default(cdp_server):
cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
result = json.loads(
browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout="nope") # type: ignore[arg-type]
)
assert result["success"] is True
# ---------------------------------------------------------------------------
# Registry integration
# ---------------------------------------------------------------------------
def test_registered_in_browser_toolset():
from tools.registry import registry
entry = registry.get_entry("browser_cdp")
assert entry is not None
assert entry.toolset == "browser"
assert entry.schema["name"] == "browser_cdp"
assert entry.schema["parameters"]["required"] == ["method"]
assert "Chrome DevTools Protocol" in entry.schema["description"]
assert browser_cdp_tool.CDP_DOCS_URL in entry.schema["description"]
def test_dispatch_through_registry(cdp_server):
from tools.registry import registry
cdp_server.on("Target.getTargets", lambda p, s: {"targetInfos": []})
raw = registry.dispatch(
"browser_cdp", {"method": "Target.getTargets"}, task_id="t1"
)
result = json.loads(raw)
assert result["success"] is True
assert result["method"] == "Target.getTargets"
# ---------------------------------------------------------------------------
# check_fn gating
# ---------------------------------------------------------------------------
def test_check_fn_false_when_no_cdp_url(monkeypatch):
"""Gate closes when no CDP URL is set — even if the browser toolset is
otherwise configured."""
import tools.browser_tool as bt
monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
monkeypatch.setattr(bt, "_get_cdp_override", lambda: "")
assert browser_cdp_tool._browser_cdp_check() is False
def test_check_fn_true_when_cdp_url_set(monkeypatch):
"""Gate opens as soon as a CDP URL is resolvable."""
import tools.browser_tool as bt
monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
monkeypatch.setattr(
bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
)
assert browser_cdp_tool._browser_cdp_check() is True
def test_check_fn_false_when_browser_requirements_fail(monkeypatch):
"""Even with a CDP URL, gate closes if the overall browser toolset is
unavailable (e.g. agent-browser not installed)."""
import tools.browser_tool as bt
monkeypatch.setattr(bt, "check_browser_requirements", lambda: False)
monkeypatch.setattr(
bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
)
assert browser_cdp_tool._browser_cdp_check() is False

416
tools/browser_cdp_tool.py Normal file
View file

@ -0,0 +1,416 @@
#!/usr/bin/env python3
"""
Raw Chrome DevTools Protocol (CDP) passthrough tool.
Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to
the browser's DevTools WebSocket endpoint. Works when a CDP URL is
configured either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or
``browser.cdp_url`` in ``config.yaml`` or when a CDP-backed cloud provider
session is active.
This is the escape hatch for browser operations not covered by the main
browser tool surface (``browser_navigate``, ``browser_click``,
``browser_console``, etc.) handling native dialogs, iframe-scoped
evaluation, cookie/network control, low-level tab management, etc.
Method reference: https://chromedevtools.github.io/devtools-protocol/
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
from typing import Any, Dict, Optional
from tools.registry import registry, tool_error
logger = logging.getLogger(__name__)
CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/"
# ``websockets`` is a transitive dependency of hermes-agent (via fal_client
# and firecrawl-py) and is already imported by gateway/platforms/feishu.py.
# Wrap the import so a clean error surfaces if the package is ever absent.
try:
import websockets
from websockets.exceptions import WebSocketException
_WS_AVAILABLE = True
except ImportError:
websockets = None # type: ignore[assignment]
WebSocketException = Exception # type: ignore[assignment,misc]
_WS_AVAILABLE = False
# ---------------------------------------------------------------------------
# Async-from-sync bridge (matches the pattern in homeassistant_tool.py)
# ---------------------------------------------------------------------------
def _run_async(coro):
"""Run an async coroutine from a sync handler, safe inside or outside a loop."""
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, coro)
return future.result()
return asyncio.run(coro)
# ---------------------------------------------------------------------------
# Endpoint resolution
# ---------------------------------------------------------------------------
def _resolve_cdp_endpoint() -> str:
"""Return the normalized CDP WebSocket URL, or empty string if unavailable.
Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays
consistent with the rest of the browser tool surface:
1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``)
2. ``browser.cdp_url`` in ``config.yaml``
"""
try:
from tools.browser_tool import _get_cdp_override # type: ignore[import-not-found]
return (_get_cdp_override() or "").strip()
except Exception as exc: # pragma: no cover — defensive
logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc)
return ""
# ---------------------------------------------------------------------------
# Core CDP call
# ---------------------------------------------------------------------------
async def _cdp_call(
ws_url: str,
method: str,
params: Dict[str, Any],
target_id: Optional[str],
timeout: float,
) -> Dict[str, Any]:
"""Make a single CDP call, optionally attaching to a target first.
When ``target_id`` is provided, we call ``Target.attachToTarget`` with
``flatten=True`` to multiplex a page-level session over the same
browser-level WebSocket, then send ``method`` with that ``sessionId``.
When ``target_id`` is None, ``method`` is sent at browser level which
works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other
globally-scoped domains.
"""
assert websockets is not None # guarded by _WS_AVAILABLE at call-site
async with websockets.connect(
ws_url,
max_size=None, # CDP responses (e.g. DOM.getDocument) can be large
open_timeout=timeout,
close_timeout=5,
ping_interval=None, # CDP server doesn't expect pings
) as ws:
next_id = 1
session_id: Optional[str] = None
# --- Step 1: attach to target if requested ---
if target_id:
attach_id = next_id
next_id += 1
await ws.send(
json.dumps(
{
"id": attach_id,
"method": "Target.attachToTarget",
"params": {"targetId": target_id, "flatten": True},
}
)
)
deadline = asyncio.get_event_loop().time() + timeout
while True:
remaining = deadline - asyncio.get_event_loop().time()
if remaining <= 0:
raise TimeoutError(
f"Timed out attaching to target {target_id}"
)
raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
msg = json.loads(raw)
if msg.get("id") == attach_id:
if "error" in msg:
raise RuntimeError(
f"Target.attachToTarget failed: {msg['error']}"
)
session_id = msg.get("result", {}).get("sessionId")
if not session_id:
raise RuntimeError(
"Target.attachToTarget did not return a sessionId"
)
break
# Ignore events (messages without "id") while waiting
# --- Step 2: dispatch the real method ---
call_id = next_id
next_id += 1
req: Dict[str, Any] = {
"id": call_id,
"method": method,
"params": params or {},
}
if session_id:
req["sessionId"] = session_id
await ws.send(json.dumps(req))
deadline = asyncio.get_event_loop().time() + timeout
while True:
remaining = deadline - asyncio.get_event_loop().time()
if remaining <= 0:
raise TimeoutError(
f"Timed out waiting for response to {method}"
)
raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
msg = json.loads(raw)
if msg.get("id") == call_id:
if "error" in msg:
raise RuntimeError(f"CDP error: {msg['error']}")
return msg.get("result", {})
# Ignore events / out-of-order responses
# ---------------------------------------------------------------------------
# Public tool function
# ---------------------------------------------------------------------------
def browser_cdp(
method: str,
params: Optional[Dict[str, Any]] = None,
target_id: Optional[str] = None,
timeout: float = 30.0,
task_id: Optional[str] = None,
) -> str:
"""Send a raw CDP command. See ``CDP_DOCS_URL`` for method documentation.
Args:
method: CDP method name, e.g. ``"Target.getTargets"``.
params: Method-specific parameters; defaults to ``{}``.
target_id: Optional target/tab ID for page-level methods. When set,
we first attach to the target (``flatten=True``) and send
``method`` with the resulting ``sessionId``.
timeout: Seconds to wait for the call to complete.
task_id: Unused (tool is stateless) accepted for uniformity with
other browser tools.
Returns:
JSON string ``{"success": True, "method": ..., "result": {...}}`` on
success, or ``{"error": "..."}`` on failure.
"""
del task_id # unused — stateless
if not method or not isinstance(method, str):
return tool_error(
"'method' is required (e.g. 'Target.getTargets')",
cdp_docs=CDP_DOCS_URL,
)
if not _WS_AVAILABLE:
return tool_error(
"The 'websockets' Python package is required but not installed. "
"Install it with: pip install websockets"
)
endpoint = _resolve_cdp_endpoint()
if not endpoint:
return tool_error(
"No CDP endpoint is available. Run '/browser connect' to attach "
"to a running Chrome, or set 'browser.cdp_url' in config.yaml. "
"The Camofox backend is REST-only and does not expose CDP.",
cdp_docs=CDP_DOCS_URL,
)
if not endpoint.startswith(("ws://", "wss://")):
return tool_error(
f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
"Expected ws://... or wss://... — the /browser connect "
"resolver should have rewritten this. Check that Chrome is "
"actually listening on the debug port."
)
call_params: Dict[str, Any] = params or {}
if not isinstance(call_params, dict):
return tool_error(
f"'params' must be an object/dict, got {type(call_params).__name__}"
)
try:
safe_timeout = float(timeout) if timeout else 30.0
except (TypeError, ValueError):
safe_timeout = 30.0
safe_timeout = max(1.0, min(safe_timeout, 300.0))
try:
result = _run_async(
_cdp_call(endpoint, method, call_params, target_id, safe_timeout)
)
except asyncio.TimeoutError as exc:
return tool_error(
f"CDP call timed out after {safe_timeout}s: {exc}",
method=method,
)
except TimeoutError as exc:
return tool_error(str(exc), method=method)
except RuntimeError as exc:
return tool_error(str(exc), method=method)
except WebSocketException as exc:
return tool_error(
f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
"browser may have disconnected — try '/browser connect' again.",
method=method,
)
except Exception as exc: # pragma: no cover — unexpected
logger.exception("browser_cdp unexpected error")
return tool_error(
f"Unexpected error: {type(exc).__name__}: {exc}",
method=method,
)
payload: Dict[str, Any] = {
"success": True,
"method": method,
"result": result,
}
if target_id:
payload["target_id"] = target_id
return json.dumps(payload, ensure_ascii=False)
# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------
BROWSER_CDP_SCHEMA: Dict[str, Any] = {
"name": "browser_cdp",
"description": (
"Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for "
"browser operations not covered by browser_navigate, browser_click, "
"browser_console, etc.\n\n"
"**Requires a reachable CDP endpoint.** Available when the user has "
"run '/browser connect' to attach to a running Chrome, or when "
"'browser.cdp_url' is set in config.yaml. Not currently wired up for "
"cloud backends (Browserbase, Browser Use, Firecrawl) — those expose "
"CDP per session but live-session routing is a follow-up. Camofox is "
"REST-only and will never support CDP. If the tool is in your toolset "
"at all, a CDP endpoint is already reachable.\n\n"
f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a "
"method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') "
"to look up parameters and return shape.\n\n"
"**Common patterns:**\n"
"- List tabs: method='Target.getTargets', params={}\n"
"- Handle a native JS dialog: method='Page.handleJavaScriptDialog', "
"params={'accept': true, 'promptText': ''}, target_id=<tabId>\n"
"- Get all cookies: method='Network.getAllCookies', params={}\n"
"- Eval in a specific tab: method='Runtime.evaluate', "
"params={'expression': '...', 'returnByValue': true}, "
"target_id=<tabId>\n"
"- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', "
"params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, "
"'mobile': false}, target_id=<tabId>\n\n"
"**Usage rules:**\n"
"- Browser-level methods (Target.*, Browser.*, Storage.*): omit "
"target_id.\n"
"- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, "
"Network.* scoped to a tab): pass target_id from Target.getTargets.\n"
"- Each call is independent — sessions and event subscriptions do "
"not persist between calls. For stateful workflows, prefer the "
"dedicated browser tools."
),
"parameters": {
"type": "object",
"properties": {
"method": {
"type": "string",
"description": (
"CDP method name, e.g. 'Target.getTargets', "
"'Runtime.evaluate', 'Page.handleJavaScriptDialog'."
),
},
"params": {
"type": "object",
"description": (
"Method-specific parameters as a JSON object. Omit or "
"pass {} for methods that take no parameters."
),
"additionalProperties": True,
},
"target_id": {
"type": "string",
"description": (
"Optional. Target/tab ID from Target.getTargets result "
"(each entry's 'targetId'). Required for page-level "
"methods; must be omitted for browser-level methods."
),
},
"timeout": {
"type": "number",
"description": (
"Timeout in seconds (default 30, max 300)."
),
"default": 30,
},
},
"required": ["method"],
},
}
def _browser_cdp_check() -> bool:
"""Availability check for browser_cdp.
The tool is only offered when the Python side can actually reach a CDP
endpoint right now meaning a static URL is set via ``/browser connect``
(``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``.
Backends that do *not* currently expose CDP to us Camofox (REST-only),
the default local agent-browser mode (Playwright hides its internal CDP
port), and cloud providers whose per-session ``cdp_url`` is not yet
surfaced are gated out so the model doesn't see a tool that would
reliably fail. Cloud-provider CDP routing is a follow-up.
Kept in a thin wrapper so the registration statement stays at module top
level (the tool-discovery AST scan only picks up top-level
``registry.register(...)`` calls).
"""
try:
from tools.browser_tool import ( # type: ignore[import-not-found]
_get_cdp_override,
check_browser_requirements,
)
except ImportError as exc: # pragma: no cover — defensive
logger.debug("browser_cdp check: browser_tool import failed: %s", exc)
return False
if not check_browser_requirements():
return False
return bool(_get_cdp_override())
registry.register(
name="browser_cdp",
toolset="browser",
schema=BROWSER_CDP_SCHEMA,
handler=lambda args, **kw: browser_cdp(
method=args.get("method", ""),
params=args.get("params"),
target_id=args.get("target_id"),
timeout=args.get("timeout", 30.0),
task_id=kw.get("task_id"),
),
check_fn=_browser_cdp_check,
emoji="🧪",
)

View file

@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [
"browser_navigate", "browser_snapshot", "browser_click",
"browser_type", "browser_scroll", "browser_back",
"browser_press", "browser_get_images",
"browser_vision", "browser_console",
"browser_vision", "browser_console", "browser_cdp",
# Text-to-speech
"text_to_speech",
# Planning & memory
@ -115,7 +115,7 @@ TOOLSETS = {
"browser_navigate", "browser_snapshot", "browser_click",
"browser_type", "browser_scroll", "browser_back",
"browser_press", "browser_get_images",
"browser_vision", "browser_console", "web_search"
"browser_vision", "browser_console", "browser_cdp", "web_search"
],
"includes": []
},
@ -249,7 +249,7 @@ TOOLSETS = {
"browser_navigate", "browser_snapshot", "browser_click",
"browser_type", "browser_scroll", "browser_back",
"browser_press", "browser_get_images",
"browser_vision", "browser_console",
"browser_vision", "browser_console", "browser_cdp",
"todo", "memory",
"session_search",
"execute_code", "delegate_task",
@ -274,7 +274,7 @@ TOOLSETS = {
"browser_navigate", "browser_snapshot", "browser_click",
"browser_type", "browser_scroll", "browser_back",
"browser_press", "browser_get_images",
"browser_vision", "browser_console",
"browser_vision", "browser_console", "browser_cdp",
# Planning & memory
"todo", "memory",
# Session history search

View file

@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
# Built-in Tools Reference
This page documents all 52 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
:::tip MCP Tools
In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
@ -19,6 +19,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
| Tool | Description | Requires environment |
|------|-------------|----------------------|
| `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — |
| `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
| `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
| `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |

View file

@ -52,7 +52,7 @@ Or in-session:
| Toolset | Tools | Purpose |
|---------|-------|---------|
| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. |
| `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
| `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
| `cronjob` | `cronjob` | Schedule and manage recurring tasks. |

View file

@ -327,6 +327,36 @@ Check the browser console for any JavaScript errors
Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.
### `browser_cdp`
Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.
**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up.
**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape.
Common patterns:
```
# List tabs (browser-level, no target_id)
browser_cdp(method="Target.getTargets")
# Handle a native JS dialog on a tab
browser_cdp(method="Page.handleJavaScriptDialog",
params={"accept": true, "promptText": ""},
target_id="<tabId>")
# Evaluate JS in a specific tab
browser_cdp(method="Runtime.evaluate",
params={"expression": "document.title", "returnByValue": true},
target_id="<tabId>")
# Get all cookies
browser_cdp(method="Network.getAllCookies")
```
Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls.
## Practical Examples
### Filling Out a Web Form