mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(browser): add browser_cdp raw DevTools Protocol passthrough (#12369)
Agents can now send arbitrary CDP commands to the browser. The tool is gated on a reachable CDP endpoint at session start — it only appears in the toolset when BROWSER_CDP_URL is set (from '/browser connect') or 'browser.cdp_url' is configured in config.yaml. Backends that don't currently expose CDP to the Python side (Camofox, default local agent-browser, cloud providers whose per-session cdp_url is not yet surfaced) do not see the tool at all. Tool schema description links to the CDP method reference at https://chromedevtools.github.io/devtools-protocol/ so the agent can web_extract specific method docs on demand. Stateless per call. Browser-level methods (Target.*, Browser.*, Storage.*) omit target_id. Page-level methods attach to the target with flatten=true and dispatch the method on the returned sessionId. Clean errors when the endpoint becomes unreachable mid-session or the URL isn't a WebSocket. Tests: 19 unit (mock CDP server + gate checks) + E2E against real headless Chrome (Target.getTargets, Browser.getVersion, Runtime.evaluate with target_id, Page.navigate + re-eval, bogus method, bogus target_id, missing endpoint) + E2E of the check_fn gate (tool hidden without CDP URL, visible with it, hidden again after unset).
This commit is contained in:
parent
d66414a844
commit
ce410521b3
6 changed files with 862 additions and 7 deletions
408
tests/tools/test_browser_cdp_tool.py
Normal file
408
tests/tools/test_browser_cdp_tool.py
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
"""Unit tests for browser_cdp tool.
|
||||
|
||||
Uses a tiny in-process ``websockets`` server to simulate a CDP endpoint —
|
||||
gives real protocol coverage (connect, send, recv, close) without needing
|
||||
a real Chrome instance.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pytest
|
||||
|
||||
import websockets
|
||||
from websockets.asyncio.server import serve
|
||||
|
||||
from tools import browser_cdp_tool
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-process CDP mock server
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _CDPServer:
|
||||
"""A tiny CDP-over-WebSocket mock.
|
||||
|
||||
Each client gets a greeting-free stream. The server replies to each
|
||||
inbound request whose ``id`` is set, using the registered handler for
|
||||
that method. If no handler is registered, returns a generic CDP error.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._handlers: Dict[str, Any] = {}
|
||||
self._responses: List[Dict[str, Any]] = []
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
self._server: Any = None
|
||||
self._thread: threading.Thread | None = None
|
||||
self._host = "127.0.0.1"
|
||||
self._port = 0
|
||||
|
||||
# --- handler registration --------------------------------------------
|
||||
|
||||
def on(self, method: str, handler):
|
||||
"""Register a handler ``handler(params, session_id) -> dict or Exception``."""
|
||||
self._handlers[method] = handler
|
||||
|
||||
# --- lifecycle -------------------------------------------------------
|
||||
|
||||
def start(self) -> str:
|
||||
ready = threading.Event()
|
||||
|
||||
def _run() -> None:
|
||||
self._loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self._loop)
|
||||
|
||||
async def _handler(ws):
|
||||
try:
|
||||
async for raw in ws:
|
||||
msg = json.loads(raw)
|
||||
call_id = msg.get("id")
|
||||
method = msg.get("method", "")
|
||||
params = msg.get("params", {}) or {}
|
||||
session_id = msg.get("sessionId")
|
||||
self._responses.append(msg)
|
||||
|
||||
fn = self._handlers.get(method)
|
||||
if fn is None:
|
||||
reply = {
|
||||
"id": call_id,
|
||||
"error": {
|
||||
"code": -32601,
|
||||
"message": f"No handler for {method}",
|
||||
},
|
||||
}
|
||||
else:
|
||||
try:
|
||||
result = fn(params, session_id)
|
||||
if isinstance(result, Exception):
|
||||
raise result
|
||||
reply = {"id": call_id, "result": result}
|
||||
except Exception as exc:
|
||||
reply = {
|
||||
"id": call_id,
|
||||
"error": {"code": -1, "message": str(exc)},
|
||||
}
|
||||
if session_id:
|
||||
reply["sessionId"] = session_id
|
||||
await ws.send(json.dumps(reply))
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
pass
|
||||
|
||||
async def _serve() -> None:
|
||||
self._server = await serve(_handler, self._host, 0)
|
||||
sock = next(iter(self._server.sockets))
|
||||
self._port = sock.getsockname()[1]
|
||||
ready.set()
|
||||
await self._server.wait_closed()
|
||||
|
||||
try:
|
||||
self._loop.run_until_complete(_serve())
|
||||
finally:
|
||||
self._loop.close()
|
||||
|
||||
self._thread = threading.Thread(target=_run, daemon=True)
|
||||
self._thread.start()
|
||||
if not ready.wait(timeout=5.0):
|
||||
raise RuntimeError("CDP mock server failed to start within 5s")
|
||||
return f"ws://{self._host}:{self._port}/devtools/browser/mock"
|
||||
|
||||
def stop(self) -> None:
|
||||
if self._loop and self._server:
|
||||
def _close() -> None:
|
||||
self._server.close()
|
||||
|
||||
self._loop.call_soon_threadsafe(_close)
|
||||
if self._thread:
|
||||
self._thread.join(timeout=3.0)
|
||||
|
||||
def received(self) -> List[Dict[str, Any]]:
|
||||
return list(self._responses)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cdp_server(monkeypatch):
|
||||
"""Start a CDP mock and route tool resolution to it."""
|
||||
server = _CDPServer()
|
||||
ws_url = server.start()
|
||||
monkeypatch.setattr(
|
||||
browser_cdp_tool, "_resolve_cdp_endpoint", lambda: ws_url
|
||||
)
|
||||
try:
|
||||
yield server
|
||||
finally:
|
||||
server.stop()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Input validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_missing_method_returns_error():
|
||||
result = json.loads(browser_cdp_tool.browser_cdp(method=""))
|
||||
assert "error" in result
|
||||
assert "method" in result["error"].lower()
|
||||
assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
|
||||
|
||||
|
||||
def test_non_string_method_returns_error():
|
||||
result = json.loads(browser_cdp_tool.browser_cdp(method=123)) # type: ignore[arg-type]
|
||||
assert "error" in result
|
||||
assert "method" in result["error"].lower()
|
||||
|
||||
|
||||
def test_non_dict_params_returns_error(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "ws://localhost:9999"
|
||||
)
|
||||
result = json.loads(
|
||||
browser_cdp_tool.browser_cdp(method="Target.getTargets", params="not-a-dict") # type: ignore[arg-type]
|
||||
)
|
||||
assert "error" in result
|
||||
assert "object" in result["error"].lower() or "dict" in result["error"].lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoint resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_endpoint_returns_helpful_error(monkeypatch):
|
||||
monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
|
||||
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
|
||||
assert "error" in result
|
||||
assert "/browser connect" in result["error"]
|
||||
assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
|
||||
|
||||
|
||||
def test_non_ws_endpoint_returns_error(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "http://localhost:9222"
|
||||
)
|
||||
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
|
||||
assert "error" in result
|
||||
assert "WebSocket" in result["error"]
|
||||
|
||||
|
||||
def test_websockets_missing_returns_error(monkeypatch):
|
||||
monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False)
|
||||
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
|
||||
assert "error" in result
|
||||
assert "websockets" in result["error"].lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Happy-path: browser-level call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_browser_level_success(cdp_server):
|
||||
cdp_server.on(
|
||||
"Target.getTargets",
|
||||
lambda params, sid: {
|
||||
"targetInfos": [
|
||||
{"targetId": "A", "type": "page", "title": "Tab 1", "url": "about:blank"},
|
||||
{"targetId": "B", "type": "page", "title": "Tab 2", "url": "https://a.test"},
|
||||
]
|
||||
},
|
||||
)
|
||||
result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
|
||||
assert result["success"] is True
|
||||
assert result["method"] == "Target.getTargets"
|
||||
assert "target_id" not in result
|
||||
assert len(result["result"]["targetInfos"]) == 2
|
||||
# Verify the server actually received exactly one call (no extra traffic)
|
||||
calls = cdp_server.received()
|
||||
assert len(calls) == 1
|
||||
assert calls[0]["method"] == "Target.getTargets"
|
||||
assert "sessionId" not in calls[0]
|
||||
|
||||
|
||||
def test_empty_params_sends_empty_object(cdp_server):
|
||||
cdp_server.on("Browser.getVersion", lambda params, sid: {"product": "Mock/1.0"})
|
||||
json.loads(browser_cdp_tool.browser_cdp(method="Browser.getVersion"))
|
||||
assert cdp_server.received()[0]["params"] == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Happy-path: target-attached call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_target_attach_then_call(cdp_server):
|
||||
cdp_server.on(
|
||||
"Target.attachToTarget",
|
||||
lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
|
||||
)
|
||||
cdp_server.on(
|
||||
"Runtime.evaluate",
|
||||
lambda params, sid: {
|
||||
"result": {"type": "string", "value": f"evaluated[{sid}]"},
|
||||
},
|
||||
)
|
||||
result = json.loads(
|
||||
browser_cdp_tool.browser_cdp(
|
||||
method="Runtime.evaluate",
|
||||
params={"expression": "document.title", "returnByValue": True},
|
||||
target_id="tab-A",
|
||||
)
|
||||
)
|
||||
assert result["success"] is True
|
||||
assert result["target_id"] == "tab-A"
|
||||
assert result["result"]["result"]["value"] == "evaluated[sess-tab-A]"
|
||||
|
||||
calls = cdp_server.received()
|
||||
# First call: attach
|
||||
assert calls[0]["method"] == "Target.attachToTarget"
|
||||
assert calls[0]["params"] == {"targetId": "tab-A", "flatten": True}
|
||||
# Second call: dispatched method on the session
|
||||
assert calls[1]["method"] == "Runtime.evaluate"
|
||||
assert calls[1]["sessionId"] == "sess-tab-A"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CDP error responses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_cdp_method_error_returns_tool_error(cdp_server):
|
||||
# No handler registered -> server returns CDP error
|
||||
result = json.loads(
|
||||
browser_cdp_tool.browser_cdp(method="NonExistent.method")
|
||||
)
|
||||
assert "error" in result
|
||||
assert "CDP error" in result["error"]
|
||||
assert result.get("method") == "NonExistent.method"
|
||||
|
||||
|
||||
def test_attach_failure_returns_tool_error(cdp_server):
|
||||
# Target.attachToTarget has no handler -> server errors on attach
|
||||
result = json.loads(
|
||||
browser_cdp_tool.browser_cdp(
|
||||
method="Runtime.evaluate",
|
||||
params={"expression": "1+1"},
|
||||
target_id="missing",
|
||||
)
|
||||
)
|
||||
assert "error" in result
|
||||
assert "Target.attachToTarget" in result["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Timeouts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_timeout_when_server_never_replies(cdp_server):
|
||||
# Register a handler that blocks forever
|
||||
def slow(params, sid):
|
||||
time.sleep(10)
|
||||
return {}
|
||||
|
||||
cdp_server.on("Page.slowMethod", slow)
|
||||
result = json.loads(
|
||||
browser_cdp_tool.browser_cdp(
|
||||
method="Page.slowMethod", timeout=0.5
|
||||
)
|
||||
)
|
||||
assert "error" in result
|
||||
assert "tim" in result["error"].lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Timeout clamping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_timeout_clamped_above_max(cdp_server):
|
||||
cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
|
||||
# timeout=10_000 should be clamped to 300 but still succeed
|
||||
result = json.loads(
|
||||
browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout=10_000)
|
||||
)
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
def test_invalid_timeout_falls_back_to_default(cdp_server):
|
||||
cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
|
||||
result = json.loads(
|
||||
browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout="nope") # type: ignore[arg-type]
|
||||
)
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_registered_in_browser_toolset():
|
||||
from tools.registry import registry
|
||||
|
||||
entry = registry.get_entry("browser_cdp")
|
||||
assert entry is not None
|
||||
assert entry.toolset == "browser"
|
||||
assert entry.schema["name"] == "browser_cdp"
|
||||
assert entry.schema["parameters"]["required"] == ["method"]
|
||||
assert "Chrome DevTools Protocol" in entry.schema["description"]
|
||||
assert browser_cdp_tool.CDP_DOCS_URL in entry.schema["description"]
|
||||
|
||||
|
||||
def test_dispatch_through_registry(cdp_server):
|
||||
from tools.registry import registry
|
||||
|
||||
cdp_server.on("Target.getTargets", lambda p, s: {"targetInfos": []})
|
||||
raw = registry.dispatch(
|
||||
"browser_cdp", {"method": "Target.getTargets"}, task_id="t1"
|
||||
)
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert result["method"] == "Target.getTargets"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_fn gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_check_fn_false_when_no_cdp_url(monkeypatch):
|
||||
"""Gate closes when no CDP URL is set — even if the browser toolset is
|
||||
otherwise configured."""
|
||||
import tools.browser_tool as bt
|
||||
|
||||
monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
|
||||
monkeypatch.setattr(bt, "_get_cdp_override", lambda: "")
|
||||
assert browser_cdp_tool._browser_cdp_check() is False
|
||||
|
||||
|
||||
def test_check_fn_true_when_cdp_url_set(monkeypatch):
|
||||
"""Gate opens as soon as a CDP URL is resolvable."""
|
||||
import tools.browser_tool as bt
|
||||
|
||||
monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
|
||||
)
|
||||
assert browser_cdp_tool._browser_cdp_check() is True
|
||||
|
||||
|
||||
def test_check_fn_false_when_browser_requirements_fail(monkeypatch):
|
||||
"""Even with a CDP URL, gate closes if the overall browser toolset is
|
||||
unavailable (e.g. agent-browser not installed)."""
|
||||
import tools.browser_tool as bt
|
||||
|
||||
monkeypatch.setattr(bt, "check_browser_requirements", lambda: False)
|
||||
monkeypatch.setattr(
|
||||
bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
|
||||
)
|
||||
assert browser_cdp_tool._browser_cdp_check() is False
|
||||
416
tools/browser_cdp_tool.py
Normal file
416
tools/browser_cdp_tool.py
Normal file
|
|
@ -0,0 +1,416 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Raw Chrome DevTools Protocol (CDP) passthrough tool.
|
||||
|
||||
Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to
|
||||
the browser's DevTools WebSocket endpoint. Works when a CDP URL is
|
||||
configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or
|
||||
``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider
|
||||
session is active.
|
||||
|
||||
This is the escape hatch for browser operations not covered by the main
|
||||
browser tool surface (``browser_navigate``, ``browser_click``,
|
||||
``browser_console``, etc.) — handling native dialogs, iframe-scoped
|
||||
evaluation, cookie/network control, low-level tab management, etc.
|
||||
|
||||
Method reference: https://chromedevtools.github.io/devtools-protocol/
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from tools.registry import registry, tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/"
|
||||
|
||||
# ``websockets`` is a transitive dependency of hermes-agent (via fal_client
|
||||
# and firecrawl-py) and is already imported by gateway/platforms/feishu.py.
|
||||
# Wrap the import so a clean error surfaces if the package is ever absent.
|
||||
try:
|
||||
import websockets
|
||||
from websockets.exceptions import WebSocketException
|
||||
|
||||
_WS_AVAILABLE = True
|
||||
except ImportError:
|
||||
websockets = None # type: ignore[assignment]
|
||||
WebSocketException = Exception # type: ignore[assignment,misc]
|
||||
_WS_AVAILABLE = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Async-from-sync bridge (matches the pattern in homeassistant_tool.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _run_async(coro):
|
||||
"""Run an async coroutine from a sync handler, safe inside or outside a loop."""
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
loop = None
|
||||
|
||||
if loop and loop.is_running():
|
||||
import concurrent.futures
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
future = pool.submit(asyncio.run, coro)
|
||||
return future.result()
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoint resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _resolve_cdp_endpoint() -> str:
|
||||
"""Return the normalized CDP WebSocket URL, or empty string if unavailable.
|
||||
|
||||
Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays
|
||||
consistent with the rest of the browser tool surface:
|
||||
|
||||
1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``)
|
||||
2. ``browser.cdp_url`` in ``config.yaml``
|
||||
"""
|
||||
try:
|
||||
from tools.browser_tool import _get_cdp_override # type: ignore[import-not-found]
|
||||
|
||||
return (_get_cdp_override() or "").strip()
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc)
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core CDP call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _cdp_call(
|
||||
ws_url: str,
|
||||
method: str,
|
||||
params: Dict[str, Any],
|
||||
target_id: Optional[str],
|
||||
timeout: float,
|
||||
) -> Dict[str, Any]:
|
||||
"""Make a single CDP call, optionally attaching to a target first.
|
||||
|
||||
When ``target_id`` is provided, we call ``Target.attachToTarget`` with
|
||||
``flatten=True`` to multiplex a page-level session over the same
|
||||
browser-level WebSocket, then send ``method`` with that ``sessionId``.
|
||||
When ``target_id`` is None, ``method`` is sent at browser level — which
|
||||
works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other
|
||||
globally-scoped domains.
|
||||
"""
|
||||
assert websockets is not None # guarded by _WS_AVAILABLE at call-site
|
||||
|
||||
async with websockets.connect(
|
||||
ws_url,
|
||||
max_size=None, # CDP responses (e.g. DOM.getDocument) can be large
|
||||
open_timeout=timeout,
|
||||
close_timeout=5,
|
||||
ping_interval=None, # CDP server doesn't expect pings
|
||||
) as ws:
|
||||
next_id = 1
|
||||
session_id: Optional[str] = None
|
||||
|
||||
# --- Step 1: attach to target if requested ---
|
||||
if target_id:
|
||||
attach_id = next_id
|
||||
next_id += 1
|
||||
await ws.send(
|
||||
json.dumps(
|
||||
{
|
||||
"id": attach_id,
|
||||
"method": "Target.attachToTarget",
|
||||
"params": {"targetId": target_id, "flatten": True},
|
||||
}
|
||||
)
|
||||
)
|
||||
deadline = asyncio.get_event_loop().time() + timeout
|
||||
while True:
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
raise TimeoutError(
|
||||
f"Timed out attaching to target {target_id}"
|
||||
)
|
||||
raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
|
||||
msg = json.loads(raw)
|
||||
if msg.get("id") == attach_id:
|
||||
if "error" in msg:
|
||||
raise RuntimeError(
|
||||
f"Target.attachToTarget failed: {msg['error']}"
|
||||
)
|
||||
session_id = msg.get("result", {}).get("sessionId")
|
||||
if not session_id:
|
||||
raise RuntimeError(
|
||||
"Target.attachToTarget did not return a sessionId"
|
||||
)
|
||||
break
|
||||
# Ignore events (messages without "id") while waiting
|
||||
|
||||
# --- Step 2: dispatch the real method ---
|
||||
call_id = next_id
|
||||
next_id += 1
|
||||
req: Dict[str, Any] = {
|
||||
"id": call_id,
|
||||
"method": method,
|
||||
"params": params or {},
|
||||
}
|
||||
if session_id:
|
||||
req["sessionId"] = session_id
|
||||
await ws.send(json.dumps(req))
|
||||
|
||||
deadline = asyncio.get_event_loop().time() + timeout
|
||||
while True:
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
raise TimeoutError(
|
||||
f"Timed out waiting for response to {method}"
|
||||
)
|
||||
raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
|
||||
msg = json.loads(raw)
|
||||
if msg.get("id") == call_id:
|
||||
if "error" in msg:
|
||||
raise RuntimeError(f"CDP error: {msg['error']}")
|
||||
return msg.get("result", {})
|
||||
# Ignore events / out-of-order responses
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public tool function
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def browser_cdp(
|
||||
method: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
target_id: Optional[str] = None,
|
||||
timeout: float = 30.0,
|
||||
task_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Send a raw CDP command. See ``CDP_DOCS_URL`` for method documentation.
|
||||
|
||||
Args:
|
||||
method: CDP method name, e.g. ``"Target.getTargets"``.
|
||||
params: Method-specific parameters; defaults to ``{}``.
|
||||
target_id: Optional target/tab ID for page-level methods. When set,
|
||||
we first attach to the target (``flatten=True``) and send
|
||||
``method`` with the resulting ``sessionId``.
|
||||
timeout: Seconds to wait for the call to complete.
|
||||
task_id: Unused (tool is stateless) — accepted for uniformity with
|
||||
other browser tools.
|
||||
|
||||
Returns:
|
||||
JSON string ``{"success": True, "method": ..., "result": {...}}`` on
|
||||
success, or ``{"error": "..."}`` on failure.
|
||||
"""
|
||||
del task_id # unused — stateless
|
||||
|
||||
if not method or not isinstance(method, str):
|
||||
return tool_error(
|
||||
"'method' is required (e.g. 'Target.getTargets')",
|
||||
cdp_docs=CDP_DOCS_URL,
|
||||
)
|
||||
|
||||
if not _WS_AVAILABLE:
|
||||
return tool_error(
|
||||
"The 'websockets' Python package is required but not installed. "
|
||||
"Install it with: pip install websockets"
|
||||
)
|
||||
|
||||
endpoint = _resolve_cdp_endpoint()
|
||||
if not endpoint:
|
||||
return tool_error(
|
||||
"No CDP endpoint is available. Run '/browser connect' to attach "
|
||||
"to a running Chrome, or set 'browser.cdp_url' in config.yaml. "
|
||||
"The Camofox backend is REST-only and does not expose CDP.",
|
||||
cdp_docs=CDP_DOCS_URL,
|
||||
)
|
||||
|
||||
if not endpoint.startswith(("ws://", "wss://")):
|
||||
return tool_error(
|
||||
f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
|
||||
"Expected ws://... or wss://... — the /browser connect "
|
||||
"resolver should have rewritten this. Check that Chrome is "
|
||||
"actually listening on the debug port."
|
||||
)
|
||||
|
||||
call_params: Dict[str, Any] = params or {}
|
||||
if not isinstance(call_params, dict):
|
||||
return tool_error(
|
||||
f"'params' must be an object/dict, got {type(call_params).__name__}"
|
||||
)
|
||||
|
||||
try:
|
||||
safe_timeout = float(timeout) if timeout else 30.0
|
||||
except (TypeError, ValueError):
|
||||
safe_timeout = 30.0
|
||||
safe_timeout = max(1.0, min(safe_timeout, 300.0))
|
||||
|
||||
try:
|
||||
result = _run_async(
|
||||
_cdp_call(endpoint, method, call_params, target_id, safe_timeout)
|
||||
)
|
||||
except asyncio.TimeoutError as exc:
|
||||
return tool_error(
|
||||
f"CDP call timed out after {safe_timeout}s: {exc}",
|
||||
method=method,
|
||||
)
|
||||
except TimeoutError as exc:
|
||||
return tool_error(str(exc), method=method)
|
||||
except RuntimeError as exc:
|
||||
return tool_error(str(exc), method=method)
|
||||
except WebSocketException as exc:
|
||||
return tool_error(
|
||||
f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
|
||||
"browser may have disconnected — try '/browser connect' again.",
|
||||
method=method,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover — unexpected
|
||||
logger.exception("browser_cdp unexpected error")
|
||||
return tool_error(
|
||||
f"Unexpected error: {type(exc).__name__}: {exc}",
|
||||
method=method,
|
||||
)
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"method": method,
|
||||
"result": result,
|
||||
}
|
||||
if target_id:
|
||||
payload["target_id"] = target_id
|
||||
return json.dumps(payload, ensure_ascii=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
BROWSER_CDP_SCHEMA: Dict[str, Any] = {
|
||||
"name": "browser_cdp",
|
||||
"description": (
|
||||
"Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for "
|
||||
"browser operations not covered by browser_navigate, browser_click, "
|
||||
"browser_console, etc.\n\n"
|
||||
"**Requires a reachable CDP endpoint.** Available when the user has "
|
||||
"run '/browser connect' to attach to a running Chrome, or when "
|
||||
"'browser.cdp_url' is set in config.yaml. Not currently wired up for "
|
||||
"cloud backends (Browserbase, Browser Use, Firecrawl) — those expose "
|
||||
"CDP per session but live-session routing is a follow-up. Camofox is "
|
||||
"REST-only and will never support CDP. If the tool is in your toolset "
|
||||
"at all, a CDP endpoint is already reachable.\n\n"
|
||||
f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a "
|
||||
"method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') "
|
||||
"to look up parameters and return shape.\n\n"
|
||||
"**Common patterns:**\n"
|
||||
"- List tabs: method='Target.getTargets', params={}\n"
|
||||
"- Handle a native JS dialog: method='Page.handleJavaScriptDialog', "
|
||||
"params={'accept': true, 'promptText': ''}, target_id=<tabId>\n"
|
||||
"- Get all cookies: method='Network.getAllCookies', params={}\n"
|
||||
"- Eval in a specific tab: method='Runtime.evaluate', "
|
||||
"params={'expression': '...', 'returnByValue': true}, "
|
||||
"target_id=<tabId>\n"
|
||||
"- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', "
|
||||
"params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, "
|
||||
"'mobile': false}, target_id=<tabId>\n\n"
|
||||
"**Usage rules:**\n"
|
||||
"- Browser-level methods (Target.*, Browser.*, Storage.*): omit "
|
||||
"target_id.\n"
|
||||
"- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, "
|
||||
"Network.* scoped to a tab): pass target_id from Target.getTargets.\n"
|
||||
"- Each call is independent — sessions and event subscriptions do "
|
||||
"not persist between calls. For stateful workflows, prefer the "
|
||||
"dedicated browser tools."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"method": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"CDP method name, e.g. 'Target.getTargets', "
|
||||
"'Runtime.evaluate', 'Page.handleJavaScriptDialog'."
|
||||
),
|
||||
},
|
||||
"params": {
|
||||
"type": "object",
|
||||
"description": (
|
||||
"Method-specific parameters as a JSON object. Omit or "
|
||||
"pass {} for methods that take no parameters."
|
||||
),
|
||||
"additionalProperties": True,
|
||||
},
|
||||
"target_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional. Target/tab ID from Target.getTargets result "
|
||||
"(each entry's 'targetId'). Required for page-level "
|
||||
"methods; must be omitted for browser-level methods."
|
||||
),
|
||||
},
|
||||
"timeout": {
|
||||
"type": "number",
|
||||
"description": (
|
||||
"Timeout in seconds (default 30, max 300)."
|
||||
),
|
||||
"default": 30,
|
||||
},
|
||||
},
|
||||
"required": ["method"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _browser_cdp_check() -> bool:
|
||||
"""Availability check for browser_cdp.
|
||||
|
||||
The tool is only offered when the Python side can actually reach a CDP
|
||||
endpoint right now — meaning a static URL is set via ``/browser connect``
|
||||
(``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``.
|
||||
|
||||
Backends that do *not* currently expose CDP to us — Camofox (REST-only),
|
||||
the default local agent-browser mode (Playwright hides its internal CDP
|
||||
port), and cloud providers whose per-session ``cdp_url`` is not yet
|
||||
surfaced — are gated out so the model doesn't see a tool that would
|
||||
reliably fail. Cloud-provider CDP routing is a follow-up.
|
||||
|
||||
Kept in a thin wrapper so the registration statement stays at module top
|
||||
level (the tool-discovery AST scan only picks up top-level
|
||||
``registry.register(...)`` calls).
|
||||
"""
|
||||
try:
|
||||
from tools.browser_tool import ( # type: ignore[import-not-found]
|
||||
_get_cdp_override,
|
||||
check_browser_requirements,
|
||||
)
|
||||
except ImportError as exc: # pragma: no cover — defensive
|
||||
logger.debug("browser_cdp check: browser_tool import failed: %s", exc)
|
||||
return False
|
||||
if not check_browser_requirements():
|
||||
return False
|
||||
return bool(_get_cdp_override())
|
||||
|
||||
|
||||
registry.register(
|
||||
name="browser_cdp",
|
||||
toolset="browser",
|
||||
schema=BROWSER_CDP_SCHEMA,
|
||||
handler=lambda args, **kw: browser_cdp(
|
||||
method=args.get("method", ""),
|
||||
params=args.get("params"),
|
||||
target_id=args.get("target_id"),
|
||||
timeout=args.get("timeout", 30.0),
|
||||
task_id=kw.get("task_id"),
|
||||
),
|
||||
check_fn=_browser_cdp_check,
|
||||
emoji="🧪",
|
||||
)
|
||||
|
|
@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [
|
|||
"browser_navigate", "browser_snapshot", "browser_click",
|
||||
"browser_type", "browser_scroll", "browser_back",
|
||||
"browser_press", "browser_get_images",
|
||||
"browser_vision", "browser_console",
|
||||
"browser_vision", "browser_console", "browser_cdp",
|
||||
# Text-to-speech
|
||||
"text_to_speech",
|
||||
# Planning & memory
|
||||
|
|
@ -115,7 +115,7 @@ TOOLSETS = {
|
|||
"browser_navigate", "browser_snapshot", "browser_click",
|
||||
"browser_type", "browser_scroll", "browser_back",
|
||||
"browser_press", "browser_get_images",
|
||||
"browser_vision", "browser_console", "web_search"
|
||||
"browser_vision", "browser_console", "browser_cdp", "web_search"
|
||||
],
|
||||
"includes": []
|
||||
},
|
||||
|
|
@ -249,7 +249,7 @@ TOOLSETS = {
|
|||
"browser_navigate", "browser_snapshot", "browser_click",
|
||||
"browser_type", "browser_scroll", "browser_back",
|
||||
"browser_press", "browser_get_images",
|
||||
"browser_vision", "browser_console",
|
||||
"browser_vision", "browser_console", "browser_cdp",
|
||||
"todo", "memory",
|
||||
"session_search",
|
||||
"execute_code", "delegate_task",
|
||||
|
|
@ -274,7 +274,7 @@ TOOLSETS = {
|
|||
"browser_navigate", "browser_snapshot", "browser_click",
|
||||
"browser_type", "browser_scroll", "browser_back",
|
||||
"browser_press", "browser_get_images",
|
||||
"browser_vision", "browser_console",
|
||||
"browser_vision", "browser_console", "browser_cdp",
|
||||
# Planning & memory
|
||||
"todo", "memory",
|
||||
# Session history search
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
|
|||
|
||||
# Built-in Tools Reference
|
||||
|
||||
This page documents all 52 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
|
||||
This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
|
||||
|
||||
**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
|
||||
**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
|
||||
|
||||
:::tip MCP Tools
|
||||
In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
|
||||
|
|
@ -19,6 +19,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
|
|||
| Tool | Description | Requires environment |
|
||||
|------|-------------|----------------------|
|
||||
| `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
|
||||
| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — |
|
||||
| `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
|
||||
| `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
|
||||
| `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ Or in-session:
|
|||
|
||||
| Toolset | Tools | Purpose |
|
||||
|---------|-------|---------|
|
||||
| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
|
||||
| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. |
|
||||
| `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
|
||||
| `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
|
||||
| `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
|
||||
|
|
|
|||
|
|
@ -327,6 +327,36 @@ Check the browser console for any JavaScript errors
|
|||
|
||||
Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.
|
||||
|
||||
### `browser_cdp`
|
||||
|
||||
Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.
|
||||
|
||||
**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up.
|
||||
|
||||
**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape.
|
||||
|
||||
Common patterns:
|
||||
|
||||
```
|
||||
# List tabs (browser-level, no target_id)
|
||||
browser_cdp(method="Target.getTargets")
|
||||
|
||||
# Handle a native JS dialog on a tab
|
||||
browser_cdp(method="Page.handleJavaScriptDialog",
|
||||
params={"accept": true, "promptText": ""},
|
||||
target_id="<tabId>")
|
||||
|
||||
# Evaluate JS in a specific tab
|
||||
browser_cdp(method="Runtime.evaluate",
|
||||
params={"expression": "document.title", "returnByValue": true},
|
||||
target_id="<tabId>")
|
||||
|
||||
# Get all cookies
|
||||
browser_cdp(method="Network.getAllCookies")
|
||||
```
|
||||
|
||||
Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls.
|
||||
|
||||
## Practical Examples
|
||||
|
||||
### Filling Out a Web Form
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue