From 4b8272f549c446d06395f6aec19a141b817b9810 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sun, 19 Apr 2026 05:20:51 -0700 Subject: [PATCH] feat(browser): add browser_dialog for native JS dialog handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ergonomic wrapper over CDP's Page.handleJavaScriptDialog that accepts or dismisses alert/confirm/prompt/beforeunload dialogs blocking a page. Unsticks pages whose JS thread is frozen by an unhandled dialog — symptom is that browser_snapshot, browser_console, browser_click etc. start hanging or erroring. - action='accept'|'dismiss' required; prompt_text optional for prompt() - target_id auto-resolves when exactly one page tab is open; with multiple page tabs, errors with the tab list so the agent picks one - Shares browser_cdp's check_fn gate — only appears when CDP is reachable (/browser connect or browser.cdp_url in config). Hidden otherwise so backends that can't use it don't see it. - Safe as a probe: CDP returns a clean 'No dialog is showing' error when nothing's pending, which we pass through verbatim Dialog detection (knowing a dialog is open without being told) is NOT included — it requires persistent CDP subscriptions per session, a larger architectural change. Documented as a follow-up; agents infer from symptoms and use this tool to recover. Tests: 11 new unit tests against mock CDP server covering the wrapper (action validation, auto-resolve with 0/1/multiple page targets, explicit target_id accept/dismiss flow, prompt_text passthrough, shared gate with browser_cdp, registry dispatch). E2E probe case against real headless Chrome passes. Positive-case real-Chrome E2E is blocked by Chromium's headless auto-dismiss behavior when no persistent listener is attached — unit tests exercise the exact CDP protocol we send, so the handling path is protocol-verified; headful real-browser usage (the actual /browser connect case) keeps dialogs alive via the Chrome UI. --- tests/tools/test_browser_cdp_tool.py | 181 +++++++++++++ tools/browser_cdp_tool.py | 263 ++++++++++++++++++- toolsets.py | 8 +- website/docs/reference/tools-reference.md | 5 +- website/docs/reference/toolsets-reference.md | 2 +- website/docs/user-guide/features/browser.md | 26 ++ 6 files changed, 467 insertions(+), 18 deletions(-) diff --git a/tests/tools/test_browser_cdp_tool.py b/tests/tools/test_browser_cdp_tool.py index e7e187ceb..2fa19f231 100644 --- a/tests/tools/test_browser_cdp_tool.py +++ b/tests/tools/test_browser_cdp_tool.py @@ -406,3 +406,184 @@ def test_check_fn_false_when_browser_requirements_fail(monkeypatch): bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x" ) assert browser_cdp_tool._browser_cdp_check() is False + + +# --------------------------------------------------------------------------- +# browser_dialog +# --------------------------------------------------------------------------- + + +def test_dialog_invalid_action_returns_error(): + result = json.loads(browser_cdp_tool.browser_dialog(action="yes")) + assert "error" in result + assert "accept" in result["error"] and "dismiss" in result["error"] + + +def test_dialog_no_endpoint_returns_error(monkeypatch): + monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "") + result = json.loads(browser_cdp_tool.browser_dialog(action="accept")) + assert "error" in result + assert "/browser connect" in result["error"] + + +def test_dialog_websockets_missing_returns_error(monkeypatch): + monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False) + result = json.loads(browser_cdp_tool.browser_dialog(action="accept")) + assert "error" in result + assert "websockets" in result["error"].lower() + + +def test_dialog_explicit_target_accept_flow(cdp_server): + """With explicit target_id, we skip Target.getTargets and attach+handle.""" + cdp_server.on( + "Target.attachToTarget", + lambda params, sid: {"sessionId": f"sess-{params['targetId']}"}, + ) + cdp_server.on("Page.handleJavaScriptDialog", lambda params, sid: {}) + + result = json.loads( + browser_cdp_tool.browser_dialog( + action="accept", target_id="tab-A", prompt_text="hello" + ) + ) + assert result["success"] is True + assert result["action"] == "accept" + assert result["target_id"] == "tab-A" + + calls = cdp_server.received() + # No Target.getTargets — we went straight to attach + handle + methods = [c["method"] for c in calls] + assert "Target.getTargets" not in methods + assert methods == ["Target.attachToTarget", "Page.handleJavaScriptDialog"] + handle = calls[1] + assert handle["params"] == {"accept": True, "promptText": "hello"} + assert handle["sessionId"] == "sess-tab-A" + + +def test_dialog_explicit_target_dismiss_flow(cdp_server): + cdp_server.on( + "Target.attachToTarget", + lambda params, sid: {"sessionId": f"sess-{params['targetId']}"}, + ) + cdp_server.on("Page.handleJavaScriptDialog", lambda params, sid: {}) + + result = json.loads( + browser_cdp_tool.browser_dialog(action="dismiss", target_id="tab-B") + ) + assert result["success"] is True + assert result["action"] == "dismiss" + handle = cdp_server.received()[1] + assert handle["params"] == {"accept": False, "promptText": ""} + + +def test_dialog_auto_resolve_single_page(cdp_server): + cdp_server.on( + "Target.getTargets", + lambda params, sid: { + "targetInfos": [ + {"targetId": "only-page", "type": "page", "title": "One", "url": "a"}, + {"targetId": "bg", "type": "background_page", "title": "Bg", "url": "b"}, + {"targetId": "sw", "type": "service_worker", "title": "SW", "url": "c"}, + ] + }, + ) + cdp_server.on( + "Target.attachToTarget", + lambda params, sid: {"sessionId": f"sess-{params['targetId']}"}, + ) + cdp_server.on("Page.handleJavaScriptDialog", lambda params, sid: {}) + + result = json.loads(browser_cdp_tool.browser_dialog(action="accept")) + assert result["success"] is True + assert result["target_id"] == "only-page" + + calls = cdp_server.received() + # Expect: Target.getTargets (browser-level), then attach, then handle + assert calls[0]["method"] == "Target.getTargets" + assert calls[1]["method"] == "Target.attachToTarget" + assert calls[1]["params"]["targetId"] == "only-page" + assert calls[2]["method"] == "Page.handleJavaScriptDialog" + + +def test_dialog_auto_resolve_no_pages(cdp_server): + cdp_server.on( + "Target.getTargets", + lambda params, sid: { + "targetInfos": [ + {"targetId": "bg", "type": "background_page", "title": "Bg", "url": "x"}, + ] + }, + ) + result = json.loads(browser_cdp_tool.browser_dialog(action="accept")) + assert "error" in result + assert "No page tabs" in result["error"] + + +def test_dialog_auto_resolve_multiple_pages_lists_tabs(cdp_server): + cdp_server.on( + "Target.getTargets", + lambda params, sid: { + "targetInfos": [ + {"targetId": "A", "type": "page", "title": "First", "url": "https://a.test"}, + {"targetId": "B", "type": "page", "title": "Second", "url": "https://b.test"}, + ] + }, + ) + result = json.loads(browser_cdp_tool.browser_dialog(action="accept")) + assert "error" in result + assert "target_id" in result["error"] + assert result.get("page_count") == 2 + tab_ids = {t["targetId"] for t in result.get("tabs", [])} + assert tab_ids == {"A", "B"} + + +def test_dialog_passes_through_no_dialog_showing(cdp_server): + """CDP's 'No dialog is showing' error should surface as a tool_error.""" + cdp_server.on( + "Target.attachToTarget", + lambda params, sid: {"sessionId": "sess"}, + ) + # No handler for Page.handleJavaScriptDialog -> mock returns CDP error + result = json.loads( + browser_cdp_tool.browser_dialog(action="dismiss", target_id="tab-X") + ) + assert "error" in result + assert result.get("action") == "dismiss" + assert result.get("target_id") == "tab-X" + + +def test_dialog_registered_in_browser_toolset_with_same_gate(): + """browser_dialog must use the same check_fn as browser_cdp so they + appear/disappear together.""" + from tools.registry import registry + + cdp_entry = registry.get_entry("browser_cdp") + dialog_entry = registry.get_entry("browser_dialog") + + assert dialog_entry is not None + assert dialog_entry.toolset == "browser" + assert dialog_entry.schema["name"] == "browser_dialog" + assert dialog_entry.schema["parameters"]["required"] == ["action"] + assert set(dialog_entry.schema["parameters"]["properties"]["action"]["enum"]) == { + "accept", + "dismiss", + } + # Shared gate + assert dialog_entry.check_fn is cdp_entry.check_fn + + +def test_dialog_dispatch_through_registry(cdp_server): + from tools.registry import registry + + cdp_server.on( + "Target.attachToTarget", lambda p, s: {"sessionId": "sess"} + ) + cdp_server.on("Page.handleJavaScriptDialog", lambda p, s: {}) + raw = registry.dispatch( + "browser_dialog", + {"action": "accept", "target_id": "tab-Z"}, + task_id="t1", + ) + result = json.loads(raw) + assert result["success"] is True + assert result["action"] == "accept" diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py index 7817b9c35..0a4c24a58 100644 --- a/tools/browser_cdp_tool.py +++ b/tools/browser_cdp_tool.py @@ -1,19 +1,24 @@ #!/usr/bin/env python3 """ -Raw Chrome DevTools Protocol (CDP) passthrough tool. +Chrome DevTools Protocol (CDP) tools. -Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to -the browser's DevTools WebSocket endpoint. Works when a CDP URL is -configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or -``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider -session is active. +Exposes two tools that share the same CDP endpoint and availability gate: -This is the escape hatch for browser operations not covered by the main -browser tool surface (``browser_navigate``, ``browser_click``, -``browser_console``, etc.) — handling native dialogs, iframe-scoped -evaluation, cookie/network control, low-level tab management, etc. +* ``browser_cdp`` — raw CDP passthrough for arbitrary commands. Escape + hatch for anything not covered by the wrapped browser tools. +* ``browser_dialog`` — ergonomic wrapper over ``Page.handleJavaScriptDialog`` + that accepts/dismisses a native JS dialog (alert/confirm/prompt/ + beforeunload) blocking the page. Auto-resolves ``target_id`` when + exactly one page tab is open. -Method reference: https://chromedevtools.github.io/devtools-protocol/ +Both tools are only registered when a CDP endpoint is actually reachable +from Python at session start — meaning ``/browser connect`` is active or +``browser.cdp_url`` is set in ``config.yaml``. Backends that don't +currently expose CDP (Camofox, default local agent-browser, cloud +providers whose per-session ``cdp_url`` isn't surfaced) don't see these +tools at all. + +CDP method reference: https://chromedevtools.github.io/devtools-protocol/ """ from __future__ import annotations @@ -414,3 +419,239 @@ registry.register( check_fn=_browser_cdp_check, emoji="🧪", ) + + +# --------------------------------------------------------------------------- +# browser_dialog — ergonomic wrapper over Page.handleJavaScriptDialog +# --------------------------------------------------------------------------- + + +def browser_dialog( + action: str, + prompt_text: Optional[str] = None, + target_id: Optional[str] = None, + timeout: float = 30.0, + task_id: Optional[str] = None, +) -> str: + """Accept or dismiss a native JS dialog blocking the page. + + Thin wrapper over the CDP ``Page.handleJavaScriptDialog`` verb that + also auto-resolves ``target_id`` when exactly one page tab is open. + Same CDP endpoint and availability gate as :func:`browser_cdp`. + + Args: + action: ``"accept"`` or ``"dismiss"``. + prompt_text: Text to enter when handling a ``prompt()`` dialog; + ignored for alert/confirm/beforeunload. + target_id: Target/tab ID from ``Target.getTargets``. Optional + when exactly one page tab is open; required otherwise. + timeout: Seconds to wait for the CDP round-trip (default 30). + task_id: Unused — accepted for uniformity with other browser tools. + + Returns: + JSON string ``{"success": True, "action": ..., "target_id": ...}`` + on success, or ``{"error": "..."}`` on failure. CDP's + ``"No dialog is showing"`` error is passed through verbatim so + callers can use this as a probe for dialog presence. + """ + del task_id + + # --- input validation ------------------------------------------------ + if action not in ("accept", "dismiss"): + return tool_error( + f"'action' must be 'accept' or 'dismiss', got {action!r}" + ) + + # --- shared gate checks (match browser_cdp) -------------------------- + if not _WS_AVAILABLE: + return tool_error( + "The 'websockets' Python package is required but not installed. " + "Install it with: pip install websockets" + ) + + endpoint = _resolve_cdp_endpoint() + if not endpoint: + return tool_error( + "No CDP endpoint is available. Run '/browser connect' to attach " + "to a running Chrome, or set 'browser.cdp_url' in config.yaml.", + cdp_docs=CDP_DOCS_URL, + ) + + if not endpoint.startswith(("ws://", "wss://")): + return tool_error( + f"CDP endpoint is not a WebSocket URL: {endpoint!r}. " + "Check that Chrome is actually listening on the debug port." + ) + + try: + safe_timeout = float(timeout) if timeout else 30.0 + except (TypeError, ValueError): + safe_timeout = 30.0 + safe_timeout = max(1.0, min(safe_timeout, 300.0)) + + # --- auto-resolve target_id when not explicitly given --------------- + resolved_target_id = target_id + if not resolved_target_id: + try: + targets_result = _run_async( + _cdp_call( + endpoint, "Target.getTargets", {}, None, safe_timeout + ) + ) + except (asyncio.TimeoutError, TimeoutError) as exc: + return tool_error( + f"Timed out listing tabs while resolving target: {exc}" + ) + except RuntimeError as exc: + return tool_error( + f"Failed to list tabs while resolving target: {exc}" + ) + except WebSocketException as exc: + return tool_error( + f"WebSocket error while resolving target at {endpoint}: {exc}" + ) + + page_targets = [ + t + for t in targets_result.get("targetInfos", []) + if t.get("type") == "page" + ] + if len(page_targets) == 0: + return tool_error( + "No page tabs found — nothing to handle a dialog on." + ) + if len(page_targets) > 1: + return tool_error( + "Multiple page tabs are open — pass target_id explicitly. " + "Use browser_cdp(method='Target.getTargets') to list them.", + page_count=len(page_targets), + tabs=[ + { + "targetId": t.get("targetId"), + "title": t.get("title", ""), + "url": t.get("url", ""), + } + for t in page_targets + ], + ) + resolved_target_id = page_targets[0].get("targetId") + if not resolved_target_id: + return tool_error( + "Target.getTargets returned a page target without a targetId" + ) + + # --- dispatch the dialog handler ------------------------------------- + cdp_params = { + "accept": action == "accept", + "promptText": prompt_text or "", + } + try: + result = _run_async( + _cdp_call( + endpoint, + "Page.handleJavaScriptDialog", + cdp_params, + resolved_target_id, + safe_timeout, + ) + ) + except (asyncio.TimeoutError, TimeoutError) as exc: + return tool_error( + f"CDP call timed out after {safe_timeout}s: {exc}", + action=action, + target_id=resolved_target_id, + ) + except RuntimeError as exc: + # CDP returns a clear "No dialog is showing" error when there's + # nothing to handle — pass it through so callers can probe. + return tool_error( + str(exc), action=action, target_id=resolved_target_id + ) + except WebSocketException as exc: + return tool_error( + f"WebSocket error talking to CDP at {endpoint}: {exc}. The " + "browser may have disconnected — try '/browser connect' again.", + action=action, + ) + except Exception as exc: # pragma: no cover — unexpected + logger.exception("browser_dialog unexpected error") + return tool_error( + f"Unexpected error: {type(exc).__name__}: {exc}", + action=action, + ) + + return json.dumps( + { + "success": True, + "action": action, + "target_id": resolved_target_id, + "result": result, + }, + ensure_ascii=False, + ) + + +BROWSER_DIALOG_SCHEMA: Dict[str, Any] = { + "name": "browser_dialog", + "description": ( + "Accept or dismiss a native JS dialog (alert/confirm/prompt/" + "beforeunload) that's blocking a page.\n\n" + "**When to use:** native dialogs freeze the page's JS thread, so " + "browser_snapshot, browser_console, browser_click and similar tools " + "will hang or error until the dialog is handled. Use this tool to " + "unstick the page. Also safe as a probe — CDP returns a clean 'No " + "dialog is showing' error when there isn't one, so you can call " + "this to check whether a suspected dialog exists.\n\n" + "**Requires the same CDP endpoint as browser_cdp.** If this tool " + "is in your toolset, the endpoint is already reachable.\n\n" + "**target_id auto-resolution:** when exactly one page tab is " + "open, target_id can be omitted. With multiple page tabs, an " + "explicit target_id is required — the error response lists the " + "tabs so you can pick one." + ), + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["accept", "dismiss"], + "description": ( + "'accept' confirms OK/Yes/Submit; 'dismiss' cancels. " + "For beforeunload dialogs, 'accept' leaves the page " + "and 'dismiss' stays on it." + ), + }, + "prompt_text": { + "type": "string", + "description": ( + "Text to enter when handling a prompt() dialog. " + "Ignored for alert, confirm, and beforeunload dialogs." + ), + }, + "target_id": { + "type": "string", + "description": ( + "Target/tab ID from Target.getTargets. Optional when " + "exactly one page tab is open; required otherwise." + ), + }, + }, + "required": ["action"], + }, +} + + +registry.register( + name="browser_dialog", + toolset="browser", + schema=BROWSER_DIALOG_SCHEMA, + handler=lambda args, **kw: browser_dialog( + action=args.get("action", ""), + prompt_text=args.get("prompt_text"), + target_id=args.get("target_id"), + timeout=args.get("timeout", 30.0), + task_id=kw.get("task_id"), + ), + check_fn=_browser_cdp_check, + emoji="💬", +) diff --git a/toolsets.py b/toolsets.py index d9f353e1f..de103da5e 100644 --- a/toolsets.py +++ b/toolsets.py @@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [ "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", # Text-to-speech "text_to_speech", # Planning & memory @@ -115,7 +115,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", "web_search" + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", "web_search" ], "includes": [] }, @@ -249,7 +249,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", "todo", "memory", "session_search", "execute_code", "delegate_task", @@ -274,7 +274,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", # Planning & memory "todo", "memory", # Session history search diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md index c255c8f6a..f644e59ea 100644 --- a/website/docs/reference/tools-reference.md +++ b/website/docs/reference/tools-reference.md @@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool # Built-in Tools Reference -This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets. +This page documents all 54 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets. -**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets. +**Quick counts:** 12 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets. :::tip MCP Tools In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration. @@ -20,6 +20,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server |------|-------------|----------------------| | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — | | `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — | +| `browser_dialog` | Accept or dismiss a native JS dialog (alert/confirm/prompt/beforeunload) that's blocking a page. Auto-resolves target_id when exactly one page tab is open. Same CDP gate as browser_cdp. Safe as a probe — returns 'No dialog is showing' when nothing's pending. | — | | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — | | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — | | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — | diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md index bb911004e..518c49b06 100644 --- a/website/docs/reference/toolsets-reference.md +++ b/website/docs/reference/toolsets-reference.md @@ -52,7 +52,7 @@ Or in-session: | Toolset | Tools | Purpose | |---------|-------|---------| -| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. | +| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_dialog`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` and `browser_dialog` share a gate on a reachable CDP endpoint — both only appear when `/browser connect` is active or `browser.cdp_url` is set. | | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. | | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. | | `cronjob` | `cronjob` | Schedule and manage recurring tasks. | diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index d6624bf7d..29f14d817 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -357,6 +357,32 @@ browser_cdp(method="Network.getAllCookies") Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls. +### `browser_dialog` + +Accept or dismiss a native JS dialog (`alert`, `confirm`, `prompt`, `beforeunload`) that's blocking a page. Native dialogs freeze the page's JS thread, so `browser_snapshot`, `browser_console`, `browser_click` and related tools will hang or error until the dialog is handled. + +**Same CDP gate as `browser_cdp`** — appears in the toolset when `/browser connect` is active or `browser.cdp_url` is set, and disappears otherwise. + +``` +# Accept (click OK / Yes / Submit) +browser_dialog(action="accept") + +# Dismiss (click Cancel / No) +browser_dialog(action="dismiss") + +# Fill a prompt() dialog +browser_dialog(action="accept", prompt_text="my answer") + +# With multiple tabs open, specify which one +browser_dialog(action="accept", target_id="") +``` + +`target_id` is auto-resolved when exactly one page tab is open. With multiple page tabs, the tool returns an error listing them so the agent can pick one explicitly. + +Safe as a probe: CDP cleanly returns `"No dialog is showing"` when nothing's pending, so calling `browser_dialog(action="dismiss")` is a zero-risk way to check for a stuck dialog. If subsequent `browser_snapshot` / `browser_click` calls start hanging on a page that was working before, this is the first thing to try. + +**Note on dialog detection:** Hermes does not currently auto-detect that a dialog is open — the agent infers from symptoms (calls hanging/erroring) and uses `browser_dialog` to unstick the page. Persistent dialog-event subscription is a larger architectural change (persistent CDP connections per session) and is a follow-up. + ## Practical Examples ### Filling Out a Web Form