From d4b26df8974bca7114fa4fbff83e4600c31230f9 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 10 May 2026 07:37:55 -0700 Subject: [PATCH] perf(browser): route browser_console eval through supervisor's persistent CDP WS (180x faster) (#23226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds CDPSupervisor.evaluate_runtime() and wires it into _browser_eval as a fast path when a supervisor is alive for the current task_id. Replaces the ~180ms agent-browser subprocess fork+exec+Node-startup hop with a ~1ms Runtime.evaluate over the supervisor's already-connected WebSocket. Falls through to the existing agent-browser CLI path when no supervisor is running (e.g. backends without CDP, or before the first browser_navigate attaches one), so behaviour is unchanged where it can't apply. JS-side exceptions surface directly without falling through to the subprocess (the subprocess would just re-raise the same error, slower); supervisor-side failures (loop down, no session) fall through cleanly. Benchmark — 30 iterations of `1 + 1` against headless Chrome: supervisor WS mean= 0.96ms median= 0.91ms agent-browser subprocess mean=179.35ms median=167.73ms → 187x speedup mean Tests: 14 unit tests (mocked supervisor + response-shape coverage), 5 real-Chrome e2e tests in test_browser_supervisor.py (gated on Chrome being installed). Browser test suite: 355 passed, 1 skipped. --- scripts/benchmark_browser_eval.py | 138 +++++++ .../test_browser_eval_supervisor_path.py | 363 ++++++++++++++++++ tests/tools/test_browser_supervisor.py | 77 ++++ tools/browser_supervisor.py | 83 ++++ tools/browser_tool.py | 47 +++ website/docs/user-guide/features/browser.md | 9 + 6 files changed, 717 insertions(+) create mode 100644 scripts/benchmark_browser_eval.py create mode 100644 tests/tools/test_browser_eval_supervisor_path.py diff --git a/scripts/benchmark_browser_eval.py b/scripts/benchmark_browser_eval.py new file mode 100644 index 00000000000..019667f2365 --- /dev/null +++ b/scripts/benchmark_browser_eval.py @@ -0,0 +1,138 @@ +"""Quick benchmark: subprocess eval vs supervisor-WS eval. + +Runs both paths against the same live Chrome and prints a comparison table. +Not a pytest — a script you run manually for the PR description. + +Usage: + .venv/bin/python scripts/benchmark_browser_eval.py [--iterations N] +""" +from __future__ import annotations + +import argparse +import shutil +import statistics +import subprocess +import sys +import tempfile +import time +import urllib.request +import json + + +def _find_chrome() -> str: + for c in ("google-chrome", "chromium", "chromium-browser"): + p = shutil.which(c) + if p: + return p + print("No Chrome binary found.", file=sys.stderr) + sys.exit(1) + + +def _start_chrome(port: int): + profile = tempfile.mkdtemp(prefix="hermes-bench-eval-") + proc = subprocess.Popen( + [ + _find_chrome(), + f"--remote-debugging-port={port}", + f"--user-data-dir={profile}", + "--no-first-run", + "--no-default-browser-check", + "--headless=new", + "--disable-gpu", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + deadline = time.monotonic() + 15 + while time.monotonic() < deadline: + try: + with urllib.request.urlopen(f"http://127.0.0.1:{port}/json/version", timeout=1) as r: + info = json.loads(r.read().decode()) + return proc, profile, info["webSocketDebuggerUrl"] + except Exception: + time.sleep(0.25) + proc.terminate() + raise RuntimeError("Chrome didn't expose CDP") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--iterations", type=int, default=50) + parser.add_argument("--port", type=int, default=9333) + args = parser.parse_args() + + proc, profile, cdp_url = _start_chrome(args.port) + try: + from tools.browser_supervisor import SUPERVISOR_REGISTRY + + # Warm up: start the supervisor, navigate to a page. + supervisor = SUPERVISOR_REGISTRY.get_or_start( + task_id="bench-eval", cdp_url=cdp_url + ) + # Give it a moment to attach. + time.sleep(1.0) + + # Sanity check: one eval over WS should succeed. + sanity = supervisor.evaluate_runtime("1 + 1") + if not sanity.get("ok") or sanity.get("result") != 2: + print(f"sanity check failed: {sanity}", file=sys.stderr) + sys.exit(2) + + # ── Bench 1: supervisor WS path ────────────────────────────────── + ws_times: list[float] = [] + for _ in range(args.iterations): + t0 = time.monotonic() + out = supervisor.evaluate_runtime("1 + 1") + t1 = time.monotonic() + assert out.get("ok"), out + ws_times.append((t1 - t0) * 1000) + + # ── Bench 2: agent-browser subprocess path ──────────────────────── + # Skip if agent-browser isn't installed — the WS bench still tells + # us what we need. + if shutil.which("agent-browser") is None and shutil.which("npx") is None: + print("agent-browser CLI not found — skipping subprocess bench.") + sub_times = [] + else: + from tools.browser_tool import _run_browser_command, _last_session_key + task_id = _last_session_key("bench-eval") + sub_times = [] + for _ in range(args.iterations): + t0 = time.monotonic() + _run_browser_command(task_id, "eval", ["1 + 1"]) + t1 = time.monotonic() + sub_times.append((t1 - t0) * 1000) + + def fmt(name: str, ts: list[float]) -> str: + if not ts: + return f" {name:<40} (skipped)" + mean = statistics.mean(ts) + median = statistics.median(ts) + mn, mx = min(ts), max(ts) + return ( + f" {name:<40} mean={mean:>7.2f}ms median={median:>7.2f}ms " + f"min={mn:>7.2f}ms max={mx:>7.2f}ms" + ) + + print() + print(f"browser_eval benchmark — {args.iterations} iterations of `1 + 1`") + print("-" * 90) + print(fmt("supervisor WS (Runtime.evaluate)", ws_times)) + print(fmt("agent-browser subprocess (eval)", sub_times)) + if ws_times and sub_times: + speedup = statistics.mean(sub_times) / statistics.mean(ws_times) + print() + print(f"Speedup: {speedup:.1f}x (mean)") + + finally: + SUPERVISOR_REGISTRY.stop_all() + proc.terminate() + try: + proc.wait(timeout=3) + except Exception: + proc.kill() + shutil.rmtree(profile, ignore_errors=True) + + +if __name__ == "__main__": + main() diff --git a/tests/tools/test_browser_eval_supervisor_path.py b/tests/tools/test_browser_eval_supervisor_path.py new file mode 100644 index 00000000000..8528b099489 --- /dev/null +++ b/tests/tools/test_browser_eval_supervisor_path.py @@ -0,0 +1,363 @@ +"""Unit tests for the supervisor-WS fast path in browser_console / _browser_eval. + +These exercise the dispatch logic in ``tools.browser_tool._browser_eval`` and +the response shaping in ``CDPSupervisor.evaluate_runtime`` using mocks — no +real browser, no real WebSocket. Real-CDP coverage lives in +``tests/tools/test_browser_supervisor.py`` (gated on Chrome being installed). +""" +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Fast-path dispatch: tools.browser_tool._browser_eval +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _disable_camofox(monkeypatch): + """Force the non-camofox path so our supervisor branch is reached.""" + import tools.browser_tool as bt + + monkeypatch.setattr(bt, "_is_camofox_mode", lambda: False) + monkeypatch.setattr(bt, "_last_session_key", lambda task_id: "test-task") + + +def _patch_supervisor(monkeypatch, supervisor): + """Wire SUPERVISOR_REGISTRY.get to return ``supervisor`` for any task_id.""" + import tools.browser_supervisor as bs + + registry = MagicMock() + registry.get.return_value = supervisor + monkeypatch.setattr(bs, "SUPERVISOR_REGISTRY", registry) + return registry + + +class TestBrowserEvalSupervisorPath: + """The supervisor fast path replaces the agent-browser subprocess hop.""" + + def test_primitive_result_routes_through_supervisor(self, monkeypatch): + import tools.browser_tool as bt + + sup = MagicMock() + sup.evaluate_runtime.return_value = { + "ok": True, + "result": 42, + "result_type": "number", + } + _patch_supervisor(monkeypatch, sup) + # If the subprocess path is hit we want a loud failure. + monkeypatch.setattr( + bt, "_run_browser_command", + lambda *a, **kw: pytest.fail("subprocess path must not run when supervisor is healthy"), + ) + + out = json.loads(bt._browser_eval("1 + 41")) + assert out["success"] is True + assert out["result"] == 42 + assert out["method"] == "cdp_supervisor" + sup.evaluate_runtime.assert_called_once_with("1 + 41") + + def test_json_string_result_is_parsed(self, monkeypatch): + """Match agent-browser semantics: JSON-string results get parsed.""" + import tools.browser_tool as bt + + sup = MagicMock() + sup.evaluate_runtime.return_value = { + "ok": True, + "result": '{"a": 1, "b": [2, 3]}', + "result_type": "string", + } + _patch_supervisor(monkeypatch, sup) + monkeypatch.setattr( + bt, "_run_browser_command", + lambda *a, **kw: pytest.fail("subprocess path must not run"), + ) + + out = json.loads(bt._browser_eval('JSON.stringify({a:1,b:[2,3]})')) + assert out["success"] is True + assert out["result"] == {"a": 1, "b": [2, 3]} + # result_type reflects the parsed Python type, not the raw JS type. + assert out["result_type"] == "dict" + + def test_non_json_string_result_kept_as_string(self, monkeypatch): + import tools.browser_tool as bt + + sup = MagicMock() + sup.evaluate_runtime.return_value = { + "ok": True, + "result": "hello world", + "result_type": "string", + } + _patch_supervisor(monkeypatch, sup) + monkeypatch.setattr(bt, "_run_browser_command", lambda *a, **kw: pytest.fail("nope")) + + out = json.loads(bt._browser_eval('"hello world"')) + assert out["result"] == "hello world" + assert out["result_type"] == "str" + + def test_js_exception_surfaces_without_subprocess_fallthrough(self, monkeypatch): + """A JS-side error must NOT trigger a (slow + redundant) subprocess retry.""" + import tools.browser_tool as bt + + sup = MagicMock() + sup.evaluate_runtime.return_value = { + "ok": False, + "error": "Uncaught ReferenceError: foo is not defined", + } + _patch_supervisor(monkeypatch, sup) + called = {"subprocess": False} + + def _fake_subprocess(*a, **kw): + called["subprocess"] = True + return {"success": True, "data": {"result": "should-not-be-used"}} + + monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess) + + out = json.loads(bt._browser_eval("foo.bar")) + assert out["success"] is False + assert "ReferenceError" in out["error"] + assert called["subprocess"] is False, \ + "JS exception should be surfaced, not retried via subprocess" + + def test_supervisor_loop_down_falls_through_to_subprocess(self, monkeypatch): + """When the supervisor itself is unavailable, fall back to the subprocess.""" + import tools.browser_tool as bt + + sup = MagicMock() + sup.evaluate_runtime.return_value = { + "ok": False, + "error": "supervisor loop is not running", + } + _patch_supervisor(monkeypatch, sup) + + called = {"subprocess": False} + + def _fake_subprocess(task_id, cmd, args): + called["subprocess"] = True + assert cmd == "eval" + return {"success": True, "data": {"result": "fallback-result"}} + + monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess) + + out = json.loads(bt._browser_eval("anything")) + assert called["subprocess"] is True + assert out["success"] is True + assert out["result"] == "fallback-result" + # Subprocess path doesn't tag the response with method=cdp_supervisor. + assert out.get("method") != "cdp_supervisor" + + def test_no_active_supervisor_falls_through_to_subprocess(self, monkeypatch): + """When SUPERVISOR_REGISTRY.get returns None, subprocess path runs.""" + import tools.browser_tool as bt + + _patch_supervisor(monkeypatch, None) + called = {"subprocess": False} + + def _fake_subprocess(task_id, cmd, args): + called["subprocess"] = True + return {"success": True, "data": {"result": "agent-browser-result"}} + + monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess) + + out = json.loads(bt._browser_eval("1+1")) + assert called["subprocess"] is True + assert out["success"] is True + assert out.get("method") != "cdp_supervisor" + + def test_supervisor_no_session_falls_through(self, monkeypatch): + """A supervisor without an attached page session must fall through cleanly.""" + import tools.browser_tool as bt + + sup = MagicMock() + sup.evaluate_runtime.return_value = { + "ok": False, + "error": "supervisor has no attached page session", + } + _patch_supervisor(monkeypatch, sup) + called = {"subprocess": False} + + def _fake_subprocess(*a, **kw): + called["subprocess"] = True + return {"success": True, "data": {"result": "fallback"}} + + monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess) + json.loads(bt._browser_eval("1+1")) + assert called["subprocess"] is True + + +# --------------------------------------------------------------------------- +# Response shaping: CDPSupervisor.evaluate_runtime +# --------------------------------------------------------------------------- + + +def _make_supervisor_with_cdp(cdp_response): + """Build a CDPSupervisor instance that mocks ``_cdp`` to return ``cdp_response``. + + Bypasses ``__init__`` entirely so we don't need a real WS connection. We + set just the state ``evaluate_runtime`` reads. + """ + import asyncio + import threading + + from tools.browser_supervisor import CDPSupervisor + + sup = object.__new__(CDPSupervisor) + sup._state_lock = threading.Lock() + sup._active = True + sup._page_session_id = "test-session-id" + + # Build a real running event loop on a background thread so + # asyncio.run_coroutine_threadsafe has somewhere to dispatch. + loop = asyncio.new_event_loop() + + def _runner(): + asyncio.set_event_loop(loop) + loop.run_forever() + + thread = threading.Thread(target=_runner, daemon=True) + thread.start() + + async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0): + return cdp_response + + sup._cdp = _fake_cdp # type: ignore[method-assign] + sup._loop = loop + sup._thread = thread + return sup + + +def _stop_supervisor(sup): + sup._loop.call_soon_threadsafe(sup._loop.stop) + sup._thread.join(timeout=2) + + +class TestEvaluateRuntimeResponseShaping: + """CDPSupervisor.evaluate_runtime decodes the Runtime.evaluate response correctly.""" + + def test_primitive_value(self): + sup = _make_supervisor_with_cdp({ + "id": 1, + "result": {"result": {"type": "number", "value": 42}}, + }) + try: + out = sup.evaluate_runtime("1 + 41") + assert out == {"ok": True, "result": 42, "result_type": "number"} + finally: + _stop_supervisor(sup) + + def test_object_value_returned_by_value(self): + sup = _make_supervisor_with_cdp({ + "id": 1, + "result": { + "result": { + "type": "object", + "value": {"foo": "bar", "n": 7}, + } + }, + }) + try: + out = sup.evaluate_runtime('({foo:"bar", n:7})') + assert out["ok"] is True + assert out["result"] == {"foo": "bar", "n": 7} + assert out["result_type"] == "object" + finally: + _stop_supervisor(sup) + + def test_undefined_value(self): + sup = _make_supervisor_with_cdp({ + "id": 1, + "result": {"result": {"type": "undefined"}}, + }) + try: + out = sup.evaluate_runtime("undefined") + assert out == {"ok": True, "result": None, "result_type": "undefined"} + finally: + _stop_supervisor(sup) + + def test_dom_node_returns_description(self): + """Non-serializable values (DOM nodes, functions) come back as description strings.""" + sup = _make_supervisor_with_cdp({ + "id": 1, + "result": { + "result": { + "type": "object", + "subtype": "node", + "description": "div#main.app", + # No 'value' key — returnByValue couldn't serialize it. + } + }, + }) + try: + out = sup.evaluate_runtime("document.querySelector('#main')") + assert out["ok"] is True + assert out["result"] == "div#main.app" + assert out["result_type"] == "object" + finally: + _stop_supervisor(sup) + + def test_js_exception_returns_error(self): + sup = _make_supervisor_with_cdp({ + "id": 1, + "result": { + "result": {"type": "undefined"}, + "exceptionDetails": { + "text": "Uncaught", + "exception": { + "description": "ReferenceError: foo is not defined", + }, + }, + }, + }) + try: + out = sup.evaluate_runtime("foo.bar") + assert out["ok"] is False + assert "ReferenceError" in out["error"] + finally: + _stop_supervisor(sup) + + def test_inactive_supervisor_returns_error_without_dispatch(self): + """Inactive supervisor short-circuits before even touching the loop.""" + import threading + from tools.browser_supervisor import CDPSupervisor + + sup = object.__new__(CDPSupervisor) + sup._state_lock = threading.Lock() + sup._active = False # ← key + sup._page_session_id = None + sup._loop = None + + out = sup.evaluate_runtime("1+1") + assert out["ok"] is False + # Either "loop is not running" or "is not active" is acceptable — + # both are caught by the supervisor-side error branch in _browser_eval. + assert "supervisor" in out["error"].lower() + + def test_no_session_attached_returns_error(self): + import asyncio + import threading + from tools.browser_supervisor import CDPSupervisor + + sup = object.__new__(CDPSupervisor) + sup._state_lock = threading.Lock() + sup._active = True + sup._page_session_id = None # ← attach hasn't happened yet + + loop = asyncio.new_event_loop() + thread = threading.Thread( + target=lambda: (asyncio.set_event_loop(loop), loop.run_forever()), + daemon=True, + ) + thread.start() + sup._loop = loop + try: + out = sup.evaluate_runtime("1+1") + assert out["ok"] is False + assert "session" in out["error"].lower() + finally: + loop.call_soon_threadsafe(loop.stop) + thread.join(timeout=2) diff --git a/tests/tools/test_browser_supervisor.py b/tests/tools/test_browser_supervisor.py index e332aec43f9..360fec53a04 100644 --- a/tests/tools/test_browser_supervisor.py +++ b/tests/tools/test_browser_supervisor.py @@ -561,3 +561,80 @@ def test_bridge_captures_prompt_and_returns_reply_text(chrome_cdp, supervisor_re value = asyncio.run(nav_and_read()) assert value == "AGENT-SUPPLIED-REPLY", f"expected AGENT-SUPPLIED-REPLY, got {value!r}" + + +def test_evaluate_runtime_primitive(chrome_cdp, supervisor_registry): + """evaluate_runtime returns primitive values via the supervisor's live WS.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-1", cdp_url=cdp_url) + + # Need a page to evaluate against. + _fire_on_page(cdp_url, "void 0") + time.sleep(0.5) + + out = supervisor.evaluate_runtime("1 + 41") + assert out["ok"] is True + assert out["result"] == 42 + assert out["result_type"] == "number" + + +def test_evaluate_runtime_object(chrome_cdp, supervisor_registry): + """Plain objects come back JSON-serialized via returnByValue=True.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-2", cdp_url=cdp_url) + + _fire_on_page(cdp_url, "void 0") + time.sleep(0.5) + + out = supervisor.evaluate_runtime('({foo: "bar", n: 7})') + assert out["ok"] is True + assert out["result"] == {"foo": "bar", "n": 7} + assert out["result_type"] == "object" + + +def test_evaluate_runtime_js_exception(chrome_cdp, supervisor_registry): + """JS exceptions surface as ok=False with the exception message.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-3", cdp_url=cdp_url) + + _fire_on_page(cdp_url, "void 0") + time.sleep(0.5) + + out = supervisor.evaluate_runtime("nonExistentVar.nope") + assert out["ok"] is False + assert "ReferenceError" in out["error"] or "not defined" in out["error"] + + +def test_evaluate_runtime_dom_node_returns_empty_object(chrome_cdp, supervisor_registry): + """DOM nodes with returnByValue=true serialize to ``{}`` (Chrome quirk). + + This is honest — DOM nodes can't be deeply JSON-serialized — and matches + DevTools console behaviour for the same expression. Documenting the + contract here so a future change that "fixes" it (e.g. switching to + returnByValue=false + DOM.describeNode) doesn't break callers expecting + the current shape. + """ + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-4", cdp_url=cdp_url) + + _fire_on_page(cdp_url, "void 0") + time.sleep(0.5) + + out = supervisor.evaluate_runtime("document.querySelector('h1')") + assert out["ok"] is True + assert out["result_type"] == "object" + # Empty dict — Chrome can't deeply-serialize a DOM node through returnByValue. + assert out["result"] == {} + + +def test_evaluate_runtime_unserializable_value(chrome_cdp, supervisor_registry): + """``Infinity``/``NaN``/``BigInt`` come back via ``unserializableValue``.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-5", cdp_url=cdp_url) + + _fire_on_page(cdp_url, "void 0") + time.sleep(0.5) + + out = supervisor.evaluate_runtime("Infinity") + assert out["ok"] is True + assert out["result"] == "Infinity" diff --git a/tools/browser_supervisor.py b/tools/browser_supervisor.py index db0b1e29909..371210350ff 100644 --- a/tools/browser_supervisor.py +++ b/tools/browser_supervisor.py @@ -457,6 +457,89 @@ class CDPSupervisor: return {"ok": False, "error": f"{type(e).__name__}: {e}"} return {"ok": True, "dialog": snapshot_copy.to_dict()} + def evaluate_runtime( + self, + expression: str, + *, + return_by_value: bool = True, + await_promise: bool = True, + timeout: float = 10.0, + ) -> Dict[str, Any]: + """Evaluate ``expression`` in the page's Runtime context over the live WS. + + Reuses the supervisor's already-connected WebSocket — zero subprocess + startup cost vs the agent-browser CLI ``eval`` command (which does + fork+exec+Node-startup+CDP-setup on every call). + + Returns a dict shaped like ``{"ok": True, "result": , "result_type": "..."}`` + on success, or ``{"ok": False, "error": "..."}`` on failure. + + ``return_by_value=True`` asks the browser to JSON-serialize the result + before sending it back, matching DevTools-console semantics for + primitive / plain-object expressions. For DOM nodes or non-serializable + objects, the browser returns a description string in ``result_type``. + """ + loop = self._loop + if loop is None or not loop.is_running(): + return {"ok": False, "error": "supervisor loop is not running"} + + with self._state_lock: + if not self._active: + return {"ok": False, "error": "supervisor is not active"} + session_id = self._page_session_id + + if not session_id: + return {"ok": False, "error": "supervisor has no attached page session"} + + async def _do_eval() -> Dict[str, Any]: + return await self._cdp( + "Runtime.evaluate", + { + "expression": expression, + "returnByValue": return_by_value, + "awaitPromise": await_promise, + # userGesture matters for things like clipboard / fullscreen + # APIs that require a user-activation context. + "userGesture": True, + }, + session_id=session_id, + timeout=timeout, + ) + + try: + fut = asyncio.run_coroutine_threadsafe(_do_eval(), loop) + response = fut.result(timeout=timeout + 1) + except Exception as exc: + return {"ok": False, "error": f"{type(exc).__name__}: {exc}"} + + # Runtime.evaluate response shape: + # {"id": N, "result": {"result": {"type": "...", "value": ..., ...}, + # "exceptionDetails": {...} (only on error)}} + result_payload = response.get("result", {}) if isinstance(response, dict) else {} + exception_details = result_payload.get("exceptionDetails") + if exception_details: + # Surface the JS-side exception with a clean message. + exc_text = exception_details.get("text") or "JavaScript exception" + exc_obj = exception_details.get("exception") or {} + description = exc_obj.get("description") + if description: + exc_text = f"{exc_text}: {description}" + return {"ok": False, "error": exc_text} + + result_obj = result_payload.get("result", {}) + result_type = result_obj.get("type", "undefined") + + if "value" in result_obj: + value = result_obj["value"] + elif result_type == "undefined": + value = None + else: + # Non-serializable (functions, DOM nodes, etc.) — return the + # browser's string description so the model gets *something*. + value = result_obj.get("description") or result_obj.get("unserializableValue") + + return {"ok": True, "result": value, "result_type": result_type} + # ── Supervisor loop internals ──────────────────────────────────────────── def _thread_main(self) -> None: diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 084c4d3d31e..b1986f7b64b 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2671,6 +2671,53 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str: return _camofox_eval(expression, task_id) effective_task_id = _last_session_key(task_id or "default") + + # --- Fast path: route through the supervisor's persistent CDP WS --------- + # When a CDPSupervisor is alive for this task_id, ``Runtime.evaluate`` runs + # on the already-connected WebSocket — zero subprocess startup cost vs + # spawning an ``agent-browser eval`` CLI process. Falls through to the + # subprocess path on any error so behaviour is unchanged when no + # supervisor is running (e.g. plain agent-browser without a CDP backend). + try: + from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found] + supervisor = SUPERVISOR_REGISTRY.get(effective_task_id) + if supervisor is not None: + sup_result = supervisor.evaluate_runtime(expression) + if sup_result.get("ok"): + raw_result = sup_result.get("result") + # Match the agent-browser path: if the value is a JSON string, + # parse it so the model gets structured data. + parsed = raw_result + if isinstance(raw_result, str): + try: + parsed = json.loads(raw_result) + except (json.JSONDecodeError, ValueError): + pass # keep as string + response = { + "success": True, + "result": parsed, + "result_type": type(parsed).__name__, + "method": "cdp_supervisor", + } + return json.dumps(response, ensure_ascii=False, default=str) + # JS exception is a real failure — surface it instead of falling + # through to the subprocess path (which would just re-run and + # produce the same exception, but slower). + err = sup_result.get("error") or "evaluate_runtime failed" + if "supervisor" not in err.lower(): + # Real JS-side error — return it. + return json.dumps({"success": False, "error": err}, ensure_ascii=False) + # Supervisor-side failure (loop down, no session) — fall through. + logger.debug( + "browser_eval: supervisor path unavailable (%s), falling back to subprocess", + err, + ) + except ImportError: + pass + except Exception as exc: # pragma: no cover — defensive + logger.debug("browser_eval: supervisor path errored (%s), falling back", exc) + + # --- Fallback: agent-browser CLI subprocess (original path) ------------- result = _run_browser_command(effective_task_id, "eval", [expression]) if not result.get("success"): diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index c078ed49769..2ae5e2b5aa4 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -423,6 +423,15 @@ Check the browser console for any JavaScript errors Use `clear=True` to clear the console after reading, so subsequent calls only show new messages. +`browser_console` also evaluates JavaScript when called with an `expression` argument — same shape as DevTools console, the result comes back parsed (JSON-serialized objects become dicts; primitive values stay primitive). + +``` +browser_console(expression="document.querySelector('h1').textContent") +browser_console(expression="JSON.stringify(performance.timing)") +``` + +When a CDP supervisor is active for the current session (typical for any session that's run `browser_navigate` against a CDP-capable backend), evaluation runs over the supervisor's persistent WebSocket — no subprocess startup cost. Falls through to the standard agent-browser CLI path otherwise. Behaviour is identical either way; only latency changes. + ### `browser_cdp` Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.