mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Ergonomic wrapper over CDP's Page.handleJavaScriptDialog that accepts or dismisses alert/confirm/prompt/beforeunload dialogs blocking a page. Unsticks pages whose JS thread is frozen by an unhandled dialog — symptom is that browser_snapshot, browser_console, browser_click etc. start hanging or erroring. - action='accept'|'dismiss' required; prompt_text optional for prompt() - target_id auto-resolves when exactly one page tab is open; with multiple page tabs, errors with the tab list so the agent picks one - Shares browser_cdp's check_fn gate — only appears when CDP is reachable (/browser connect or browser.cdp_url in config). Hidden otherwise so backends that can't use it don't see it. - Safe as a probe: CDP returns a clean 'No dialog is showing' error when nothing's pending, which we pass through verbatim Dialog detection (knowing a dialog is open without being told) is NOT included — it requires persistent CDP subscriptions per session, a larger architectural change. Documented as a follow-up; agents infer from symptoms and use this tool to recover. Tests: 11 new unit tests against mock CDP server covering the wrapper (action validation, auto-resolve with 0/1/multiple page targets, explicit target_id accept/dismiss flow, prompt_text passthrough, shared gate with browser_cdp, registry dispatch). E2E probe case against real headless Chrome passes. Positive-case real-Chrome E2E is blocked by Chromium's headless auto-dismiss behavior when no persistent listener is attached — unit tests exercise the exact CDP protocol we send, so the handling path is protocol-verified; headful real-browser usage (the actual /browser connect case) keeps dialogs alive via the Chrome UI.
657 lines
24 KiB
Python
657 lines
24 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Chrome DevTools Protocol (CDP) tools.
|
|
|
|
Exposes two tools that share the same CDP endpoint and availability gate:
|
|
|
|
* ``browser_cdp`` — raw CDP passthrough for arbitrary commands. Escape
|
|
hatch for anything not covered by the wrapped browser tools.
|
|
* ``browser_dialog`` — ergonomic wrapper over ``Page.handleJavaScriptDialog``
|
|
that accepts/dismisses a native JS dialog (alert/confirm/prompt/
|
|
beforeunload) blocking the page. Auto-resolves ``target_id`` when
|
|
exactly one page tab is open.
|
|
|
|
Both tools are only registered when a CDP endpoint is actually reachable
|
|
from Python at session start — meaning ``/browser connect`` is active or
|
|
``browser.cdp_url`` is set in ``config.yaml``. Backends that don't
|
|
currently expose CDP (Camofox, default local agent-browser, cloud
|
|
providers whose per-session ``cdp_url`` isn't surfaced) don't see these
|
|
tools at all.
|
|
|
|
CDP method reference: https://chromedevtools.github.io/devtools-protocol/
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
from typing import Any, Dict, Optional
|
|
|
|
from tools.registry import registry, tool_error
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/"
|
|
|
|
# ``websockets`` is a transitive dependency of hermes-agent (via fal_client
|
|
# and firecrawl-py) and is already imported by gateway/platforms/feishu.py.
|
|
# Wrap the import so a clean error surfaces if the package is ever absent.
|
|
try:
|
|
import websockets
|
|
from websockets.exceptions import WebSocketException
|
|
|
|
_WS_AVAILABLE = True
|
|
except ImportError:
|
|
websockets = None # type: ignore[assignment]
|
|
WebSocketException = Exception # type: ignore[assignment,misc]
|
|
_WS_AVAILABLE = False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Async-from-sync bridge (matches the pattern in homeassistant_tool.py)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _run_async(coro):
|
|
"""Run an async coroutine from a sync handler, safe inside or outside a loop."""
|
|
try:
|
|
loop = asyncio.get_running_loop()
|
|
except RuntimeError:
|
|
loop = None
|
|
|
|
if loop and loop.is_running():
|
|
import concurrent.futures
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
|
future = pool.submit(asyncio.run, coro)
|
|
return future.result()
|
|
return asyncio.run(coro)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Endpoint resolution
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _resolve_cdp_endpoint() -> str:
|
|
"""Return the normalized CDP WebSocket URL, or empty string if unavailable.
|
|
|
|
Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays
|
|
consistent with the rest of the browser tool surface:
|
|
|
|
1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``)
|
|
2. ``browser.cdp_url`` in ``config.yaml``
|
|
"""
|
|
try:
|
|
from tools.browser_tool import _get_cdp_override # type: ignore[import-not-found]
|
|
|
|
return (_get_cdp_override() or "").strip()
|
|
except Exception as exc: # pragma: no cover — defensive
|
|
logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc)
|
|
return ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Core CDP call
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def _cdp_call(
|
|
ws_url: str,
|
|
method: str,
|
|
params: Dict[str, Any],
|
|
target_id: Optional[str],
|
|
timeout: float,
|
|
) -> Dict[str, Any]:
|
|
"""Make a single CDP call, optionally attaching to a target first.
|
|
|
|
When ``target_id`` is provided, we call ``Target.attachToTarget`` with
|
|
``flatten=True`` to multiplex a page-level session over the same
|
|
browser-level WebSocket, then send ``method`` with that ``sessionId``.
|
|
When ``target_id`` is None, ``method`` is sent at browser level — which
|
|
works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other
|
|
globally-scoped domains.
|
|
"""
|
|
assert websockets is not None # guarded by _WS_AVAILABLE at call-site
|
|
|
|
async with websockets.connect(
|
|
ws_url,
|
|
max_size=None, # CDP responses (e.g. DOM.getDocument) can be large
|
|
open_timeout=timeout,
|
|
close_timeout=5,
|
|
ping_interval=None, # CDP server doesn't expect pings
|
|
) as ws:
|
|
next_id = 1
|
|
session_id: Optional[str] = None
|
|
|
|
# --- Step 1: attach to target if requested ---
|
|
if target_id:
|
|
attach_id = next_id
|
|
next_id += 1
|
|
await ws.send(
|
|
json.dumps(
|
|
{
|
|
"id": attach_id,
|
|
"method": "Target.attachToTarget",
|
|
"params": {"targetId": target_id, "flatten": True},
|
|
}
|
|
)
|
|
)
|
|
deadline = asyncio.get_event_loop().time() + timeout
|
|
while True:
|
|
remaining = deadline - asyncio.get_event_loop().time()
|
|
if remaining <= 0:
|
|
raise TimeoutError(
|
|
f"Timed out attaching to target {target_id}"
|
|
)
|
|
raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
|
|
msg = json.loads(raw)
|
|
if msg.get("id") == attach_id:
|
|
if "error" in msg:
|
|
raise RuntimeError(
|
|
f"Target.attachToTarget failed: {msg['error']}"
|
|
)
|
|
session_id = msg.get("result", {}).get("sessionId")
|
|
if not session_id:
|
|
raise RuntimeError(
|
|
"Target.attachToTarget did not return a sessionId"
|
|
)
|
|
break
|
|
# Ignore events (messages without "id") while waiting
|
|
|
|
# --- Step 2: dispatch the real method ---
|
|
call_id = next_id
|
|
next_id += 1
|
|
req: Dict[str, Any] = {
|
|
"id": call_id,
|
|
"method": method,
|
|
"params": params or {},
|
|
}
|
|
if session_id:
|
|
req["sessionId"] = session_id
|
|
await ws.send(json.dumps(req))
|
|
|
|
deadline = asyncio.get_event_loop().time() + timeout
|
|
while True:
|
|
remaining = deadline - asyncio.get_event_loop().time()
|
|
if remaining <= 0:
|
|
raise TimeoutError(
|
|
f"Timed out waiting for response to {method}"
|
|
)
|
|
raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
|
|
msg = json.loads(raw)
|
|
if msg.get("id") == call_id:
|
|
if "error" in msg:
|
|
raise RuntimeError(f"CDP error: {msg['error']}")
|
|
return msg.get("result", {})
|
|
# Ignore events / out-of-order responses
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public tool function
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def browser_cdp(
|
|
method: str,
|
|
params: Optional[Dict[str, Any]] = None,
|
|
target_id: Optional[str] = None,
|
|
timeout: float = 30.0,
|
|
task_id: Optional[str] = None,
|
|
) -> str:
|
|
"""Send a raw CDP command. See ``CDP_DOCS_URL`` for method documentation.
|
|
|
|
Args:
|
|
method: CDP method name, e.g. ``"Target.getTargets"``.
|
|
params: Method-specific parameters; defaults to ``{}``.
|
|
target_id: Optional target/tab ID for page-level methods. When set,
|
|
we first attach to the target (``flatten=True``) and send
|
|
``method`` with the resulting ``sessionId``.
|
|
timeout: Seconds to wait for the call to complete.
|
|
task_id: Unused (tool is stateless) — accepted for uniformity with
|
|
other browser tools.
|
|
|
|
Returns:
|
|
JSON string ``{"success": True, "method": ..., "result": {...}}`` on
|
|
success, or ``{"error": "..."}`` on failure.
|
|
"""
|
|
del task_id # unused — stateless
|
|
|
|
if not method or not isinstance(method, str):
|
|
return tool_error(
|
|
"'method' is required (e.g. 'Target.getTargets')",
|
|
cdp_docs=CDP_DOCS_URL,
|
|
)
|
|
|
|
if not _WS_AVAILABLE:
|
|
return tool_error(
|
|
"The 'websockets' Python package is required but not installed. "
|
|
"Install it with: pip install websockets"
|
|
)
|
|
|
|
endpoint = _resolve_cdp_endpoint()
|
|
if not endpoint:
|
|
return tool_error(
|
|
"No CDP endpoint is available. Run '/browser connect' to attach "
|
|
"to a running Chrome, or set 'browser.cdp_url' in config.yaml. "
|
|
"The Camofox backend is REST-only and does not expose CDP.",
|
|
cdp_docs=CDP_DOCS_URL,
|
|
)
|
|
|
|
if not endpoint.startswith(("ws://", "wss://")):
|
|
return tool_error(
|
|
f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
|
|
"Expected ws://... or wss://... — the /browser connect "
|
|
"resolver should have rewritten this. Check that Chrome is "
|
|
"actually listening on the debug port."
|
|
)
|
|
|
|
call_params: Dict[str, Any] = params or {}
|
|
if not isinstance(call_params, dict):
|
|
return tool_error(
|
|
f"'params' must be an object/dict, got {type(call_params).__name__}"
|
|
)
|
|
|
|
try:
|
|
safe_timeout = float(timeout) if timeout else 30.0
|
|
except (TypeError, ValueError):
|
|
safe_timeout = 30.0
|
|
safe_timeout = max(1.0, min(safe_timeout, 300.0))
|
|
|
|
try:
|
|
result = _run_async(
|
|
_cdp_call(endpoint, method, call_params, target_id, safe_timeout)
|
|
)
|
|
except asyncio.TimeoutError as exc:
|
|
return tool_error(
|
|
f"CDP call timed out after {safe_timeout}s: {exc}",
|
|
method=method,
|
|
)
|
|
except TimeoutError as exc:
|
|
return tool_error(str(exc), method=method)
|
|
except RuntimeError as exc:
|
|
return tool_error(str(exc), method=method)
|
|
except WebSocketException as exc:
|
|
return tool_error(
|
|
f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
|
|
"browser may have disconnected — try '/browser connect' again.",
|
|
method=method,
|
|
)
|
|
except Exception as exc: # pragma: no cover — unexpected
|
|
logger.exception("browser_cdp unexpected error")
|
|
return tool_error(
|
|
f"Unexpected error: {type(exc).__name__}: {exc}",
|
|
method=method,
|
|
)
|
|
|
|
payload: Dict[str, Any] = {
|
|
"success": True,
|
|
"method": method,
|
|
"result": result,
|
|
}
|
|
if target_id:
|
|
payload["target_id"] = target_id
|
|
return json.dumps(payload, ensure_ascii=False)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Registry
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
BROWSER_CDP_SCHEMA: Dict[str, Any] = {
|
|
"name": "browser_cdp",
|
|
"description": (
|
|
"Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for "
|
|
"browser operations not covered by browser_navigate, browser_click, "
|
|
"browser_console, etc.\n\n"
|
|
"**Requires a reachable CDP endpoint.** Available when the user has "
|
|
"run '/browser connect' to attach to a running Chrome, or when "
|
|
"'browser.cdp_url' is set in config.yaml. Not currently wired up for "
|
|
"cloud backends (Browserbase, Browser Use, Firecrawl) — those expose "
|
|
"CDP per session but live-session routing is a follow-up. Camofox is "
|
|
"REST-only and will never support CDP. If the tool is in your toolset "
|
|
"at all, a CDP endpoint is already reachable.\n\n"
|
|
f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a "
|
|
"method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') "
|
|
"to look up parameters and return shape.\n\n"
|
|
"**Common patterns:**\n"
|
|
"- List tabs: method='Target.getTargets', params={}\n"
|
|
"- Handle a native JS dialog: method='Page.handleJavaScriptDialog', "
|
|
"params={'accept': true, 'promptText': ''}, target_id=<tabId>\n"
|
|
"- Get all cookies: method='Network.getAllCookies', params={}\n"
|
|
"- Eval in a specific tab: method='Runtime.evaluate', "
|
|
"params={'expression': '...', 'returnByValue': true}, "
|
|
"target_id=<tabId>\n"
|
|
"- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', "
|
|
"params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, "
|
|
"'mobile': false}, target_id=<tabId>\n\n"
|
|
"**Usage rules:**\n"
|
|
"- Browser-level methods (Target.*, Browser.*, Storage.*): omit "
|
|
"target_id.\n"
|
|
"- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, "
|
|
"Network.* scoped to a tab): pass target_id from Target.getTargets.\n"
|
|
"- Each call is independent — sessions and event subscriptions do "
|
|
"not persist between calls. For stateful workflows, prefer the "
|
|
"dedicated browser tools."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"method": {
|
|
"type": "string",
|
|
"description": (
|
|
"CDP method name, e.g. 'Target.getTargets', "
|
|
"'Runtime.evaluate', 'Page.handleJavaScriptDialog'."
|
|
),
|
|
},
|
|
"params": {
|
|
"type": "object",
|
|
"description": (
|
|
"Method-specific parameters as a JSON object. Omit or "
|
|
"pass {} for methods that take no parameters."
|
|
),
|
|
"additionalProperties": True,
|
|
},
|
|
"target_id": {
|
|
"type": "string",
|
|
"description": (
|
|
"Optional. Target/tab ID from Target.getTargets result "
|
|
"(each entry's 'targetId'). Required for page-level "
|
|
"methods; must be omitted for browser-level methods."
|
|
),
|
|
},
|
|
"timeout": {
|
|
"type": "number",
|
|
"description": (
|
|
"Timeout in seconds (default 30, max 300)."
|
|
),
|
|
"default": 30,
|
|
},
|
|
},
|
|
"required": ["method"],
|
|
},
|
|
}
|
|
|
|
|
|
def _browser_cdp_check() -> bool:
|
|
"""Availability check for browser_cdp.
|
|
|
|
The tool is only offered when the Python side can actually reach a CDP
|
|
endpoint right now — meaning a static URL is set via ``/browser connect``
|
|
(``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``.
|
|
|
|
Backends that do *not* currently expose CDP to us — Camofox (REST-only),
|
|
the default local agent-browser mode (Playwright hides its internal CDP
|
|
port), and cloud providers whose per-session ``cdp_url`` is not yet
|
|
surfaced — are gated out so the model doesn't see a tool that would
|
|
reliably fail. Cloud-provider CDP routing is a follow-up.
|
|
|
|
Kept in a thin wrapper so the registration statement stays at module top
|
|
level (the tool-discovery AST scan only picks up top-level
|
|
``registry.register(...)`` calls).
|
|
"""
|
|
try:
|
|
from tools.browser_tool import ( # type: ignore[import-not-found]
|
|
_get_cdp_override,
|
|
check_browser_requirements,
|
|
)
|
|
except ImportError as exc: # pragma: no cover — defensive
|
|
logger.debug("browser_cdp check: browser_tool import failed: %s", exc)
|
|
return False
|
|
if not check_browser_requirements():
|
|
return False
|
|
return bool(_get_cdp_override())
|
|
|
|
|
|
registry.register(
|
|
name="browser_cdp",
|
|
toolset="browser",
|
|
schema=BROWSER_CDP_SCHEMA,
|
|
handler=lambda args, **kw: browser_cdp(
|
|
method=args.get("method", ""),
|
|
params=args.get("params"),
|
|
target_id=args.get("target_id"),
|
|
timeout=args.get("timeout", 30.0),
|
|
task_id=kw.get("task_id"),
|
|
),
|
|
check_fn=_browser_cdp_check,
|
|
emoji="🧪",
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# browser_dialog — ergonomic wrapper over Page.handleJavaScriptDialog
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def browser_dialog(
|
|
action: str,
|
|
prompt_text: Optional[str] = None,
|
|
target_id: Optional[str] = None,
|
|
timeout: float = 30.0,
|
|
task_id: Optional[str] = None,
|
|
) -> str:
|
|
"""Accept or dismiss a native JS dialog blocking the page.
|
|
|
|
Thin wrapper over the CDP ``Page.handleJavaScriptDialog`` verb that
|
|
also auto-resolves ``target_id`` when exactly one page tab is open.
|
|
Same CDP endpoint and availability gate as :func:`browser_cdp`.
|
|
|
|
Args:
|
|
action: ``"accept"`` or ``"dismiss"``.
|
|
prompt_text: Text to enter when handling a ``prompt()`` dialog;
|
|
ignored for alert/confirm/beforeunload.
|
|
target_id: Target/tab ID from ``Target.getTargets``. Optional
|
|
when exactly one page tab is open; required otherwise.
|
|
timeout: Seconds to wait for the CDP round-trip (default 30).
|
|
task_id: Unused — accepted for uniformity with other browser tools.
|
|
|
|
Returns:
|
|
JSON string ``{"success": True, "action": ..., "target_id": ...}``
|
|
on success, or ``{"error": "..."}`` on failure. CDP's
|
|
``"No dialog is showing"`` error is passed through verbatim so
|
|
callers can use this as a probe for dialog presence.
|
|
"""
|
|
del task_id
|
|
|
|
# --- input validation ------------------------------------------------
|
|
if action not in ("accept", "dismiss"):
|
|
return tool_error(
|
|
f"'action' must be 'accept' or 'dismiss', got {action!r}"
|
|
)
|
|
|
|
# --- shared gate checks (match browser_cdp) --------------------------
|
|
if not _WS_AVAILABLE:
|
|
return tool_error(
|
|
"The 'websockets' Python package is required but not installed. "
|
|
"Install it with: pip install websockets"
|
|
)
|
|
|
|
endpoint = _resolve_cdp_endpoint()
|
|
if not endpoint:
|
|
return tool_error(
|
|
"No CDP endpoint is available. Run '/browser connect' to attach "
|
|
"to a running Chrome, or set 'browser.cdp_url' in config.yaml.",
|
|
cdp_docs=CDP_DOCS_URL,
|
|
)
|
|
|
|
if not endpoint.startswith(("ws://", "wss://")):
|
|
return tool_error(
|
|
f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
|
|
"Check that Chrome is actually listening on the debug port."
|
|
)
|
|
|
|
try:
|
|
safe_timeout = float(timeout) if timeout else 30.0
|
|
except (TypeError, ValueError):
|
|
safe_timeout = 30.0
|
|
safe_timeout = max(1.0, min(safe_timeout, 300.0))
|
|
|
|
# --- auto-resolve target_id when not explicitly given ---------------
|
|
resolved_target_id = target_id
|
|
if not resolved_target_id:
|
|
try:
|
|
targets_result = _run_async(
|
|
_cdp_call(
|
|
endpoint, "Target.getTargets", {}, None, safe_timeout
|
|
)
|
|
)
|
|
except (asyncio.TimeoutError, TimeoutError) as exc:
|
|
return tool_error(
|
|
f"Timed out listing tabs while resolving target: {exc}"
|
|
)
|
|
except RuntimeError as exc:
|
|
return tool_error(
|
|
f"Failed to list tabs while resolving target: {exc}"
|
|
)
|
|
except WebSocketException as exc:
|
|
return tool_error(
|
|
f"WebSocket error while resolving target at {endpoint}: {exc}"
|
|
)
|
|
|
|
page_targets = [
|
|
t
|
|
for t in targets_result.get("targetInfos", [])
|
|
if t.get("type") == "page"
|
|
]
|
|
if len(page_targets) == 0:
|
|
return tool_error(
|
|
"No page tabs found — nothing to handle a dialog on."
|
|
)
|
|
if len(page_targets) > 1:
|
|
return tool_error(
|
|
"Multiple page tabs are open — pass target_id explicitly. "
|
|
"Use browser_cdp(method='Target.getTargets') to list them.",
|
|
page_count=len(page_targets),
|
|
tabs=[
|
|
{
|
|
"targetId": t.get("targetId"),
|
|
"title": t.get("title", ""),
|
|
"url": t.get("url", ""),
|
|
}
|
|
for t in page_targets
|
|
],
|
|
)
|
|
resolved_target_id = page_targets[0].get("targetId")
|
|
if not resolved_target_id:
|
|
return tool_error(
|
|
"Target.getTargets returned a page target without a targetId"
|
|
)
|
|
|
|
# --- dispatch the dialog handler -------------------------------------
|
|
cdp_params = {
|
|
"accept": action == "accept",
|
|
"promptText": prompt_text or "",
|
|
}
|
|
try:
|
|
result = _run_async(
|
|
_cdp_call(
|
|
endpoint,
|
|
"Page.handleJavaScriptDialog",
|
|
cdp_params,
|
|
resolved_target_id,
|
|
safe_timeout,
|
|
)
|
|
)
|
|
except (asyncio.TimeoutError, TimeoutError) as exc:
|
|
return tool_error(
|
|
f"CDP call timed out after {safe_timeout}s: {exc}",
|
|
action=action,
|
|
target_id=resolved_target_id,
|
|
)
|
|
except RuntimeError as exc:
|
|
# CDP returns a clear "No dialog is showing" error when there's
|
|
# nothing to handle — pass it through so callers can probe.
|
|
return tool_error(
|
|
str(exc), action=action, target_id=resolved_target_id
|
|
)
|
|
except WebSocketException as exc:
|
|
return tool_error(
|
|
f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
|
|
"browser may have disconnected — try '/browser connect' again.",
|
|
action=action,
|
|
)
|
|
except Exception as exc: # pragma: no cover — unexpected
|
|
logger.exception("browser_dialog unexpected error")
|
|
return tool_error(
|
|
f"Unexpected error: {type(exc).__name__}: {exc}",
|
|
action=action,
|
|
)
|
|
|
|
return json.dumps(
|
|
{
|
|
"success": True,
|
|
"action": action,
|
|
"target_id": resolved_target_id,
|
|
"result": result,
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
|
|
|
|
BROWSER_DIALOG_SCHEMA: Dict[str, Any] = {
|
|
"name": "browser_dialog",
|
|
"description": (
|
|
"Accept or dismiss a native JS dialog (alert/confirm/prompt/"
|
|
"beforeunload) that's blocking a page.\n\n"
|
|
"**When to use:** native dialogs freeze the page's JS thread, so "
|
|
"browser_snapshot, browser_console, browser_click and similar tools "
|
|
"will hang or error until the dialog is handled. Use this tool to "
|
|
"unstick the page. Also safe as a probe — CDP returns a clean 'No "
|
|
"dialog is showing' error when there isn't one, so you can call "
|
|
"this to check whether a suspected dialog exists.\n\n"
|
|
"**Requires the same CDP endpoint as browser_cdp.** If this tool "
|
|
"is in your toolset, the endpoint is already reachable.\n\n"
|
|
"**target_id auto-resolution:** when exactly one page tab is "
|
|
"open, target_id can be omitted. With multiple page tabs, an "
|
|
"explicit target_id is required — the error response lists the "
|
|
"tabs so you can pick one."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"action": {
|
|
"type": "string",
|
|
"enum": ["accept", "dismiss"],
|
|
"description": (
|
|
"'accept' confirms OK/Yes/Submit; 'dismiss' cancels. "
|
|
"For beforeunload dialogs, 'accept' leaves the page "
|
|
"and 'dismiss' stays on it."
|
|
),
|
|
},
|
|
"prompt_text": {
|
|
"type": "string",
|
|
"description": (
|
|
"Text to enter when handling a prompt() dialog. "
|
|
"Ignored for alert, confirm, and beforeunload dialogs."
|
|
),
|
|
},
|
|
"target_id": {
|
|
"type": "string",
|
|
"description": (
|
|
"Target/tab ID from Target.getTargets. Optional when "
|
|
"exactly one page tab is open; required otherwise."
|
|
),
|
|
},
|
|
},
|
|
"required": ["action"],
|
|
},
|
|
}
|
|
|
|
|
|
registry.register(
|
|
name="browser_dialog",
|
|
toolset="browser",
|
|
schema=BROWSER_DIALOG_SCHEMA,
|
|
handler=lambda args, **kw: browser_dialog(
|
|
action=args.get("action", ""),
|
|
prompt_text=args.get("prompt_text"),
|
|
target_id=args.get("target_id"),
|
|
timeout=args.get("timeout", 30.0),
|
|
task_id=kw.get("task_id"),
|
|
),
|
|
check_fn=_browser_cdp_check,
|
|
emoji="💬",
|
|
)
|