From d4b26df8974bca7114fa4fbff83e4600c31230f9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 10 May 2026 07:37:55 -0700
Subject: [PATCH] perf(browser): route browser_console eval through
 supervisor's persistent CDP WS (180x faster) (#23226)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds CDPSupervisor.evaluate_runtime() and wires it into _browser_eval as a
fast path when a supervisor is alive for the current task_id. Replaces the
~180ms agent-browser subprocess fork+exec+Node-startup hop with a ~1ms
Runtime.evaluate over the supervisor's already-connected WebSocket.

Falls through to the existing agent-browser CLI path when no supervisor is
running (e.g. backends without CDP, or before the first browser_navigate
attaches one), so behaviour is unchanged where it can't apply.

JS-side exceptions surface directly without falling through to the
subprocess (the subprocess would just re-raise the same error, slower);
supervisor-side failures (loop down, no session) fall through cleanly.

Benchmark — 30 iterations of `1 + 1` against headless Chrome:
  supervisor WS              mean=  0.96ms  median=  0.91ms
  agent-browser subprocess   mean=179.35ms  median=167.73ms
  → 187x speedup mean

Tests: 14 unit tests (mocked supervisor + response-shape coverage), 5
real-Chrome e2e tests in test_browser_supervisor.py (gated on Chrome
being installed). Browser test suite: 355 passed, 1 skipped.
---
 scripts/benchmark_browser_eval.py             | 138 +++++++
 .../test_browser_eval_supervisor_path.py      | 363 ++++++++++++++++++
 tests/tools/test_browser_supervisor.py        |  77 ++++
 tools/browser_supervisor.py                   |  83 ++++
 tools/browser_tool.py                         |  47 +++
 website/docs/user-guide/features/browser.md   |   9 +
 6 files changed, 717 insertions(+)
 create mode 100644 scripts/benchmark_browser_eval.py
 create mode 100644 tests/tools/test_browser_eval_supervisor_path.py

diff --git a/scripts/benchmark_browser_eval.py b/scripts/benchmark_browser_eval.py
new file mode 100644
index 00000000000..019667f2365
--- /dev/null
+++ b/scripts/benchmark_browser_eval.py
@@ -0,0 +1,138 @@
+"""Quick benchmark: subprocess eval vs supervisor-WS eval.
+
+Runs both paths against the same live Chrome and prints a comparison table.
+Not a pytest — a script you run manually for the PR description.
+
+Usage:
+    .venv/bin/python scripts/benchmark_browser_eval.py [--iterations N]
+"""
+from __future__ import annotations
+
+import argparse
+import shutil
+import statistics
+import subprocess
+import sys
+import tempfile
+import time
+import urllib.request
+import json
+
+
+def _find_chrome() -> str:
+    for c in ("google-chrome", "chromium", "chromium-browser"):
+        p = shutil.which(c)
+        if p:
+            return p
+    print("No Chrome binary found.", file=sys.stderr)
+    sys.exit(1)
+
+
+def _start_chrome(port: int):
+    profile = tempfile.mkdtemp(prefix="hermes-bench-eval-")
+    proc = subprocess.Popen(
+        [
+            _find_chrome(),
+            f"--remote-debugging-port={port}",
+            f"--user-data-dir={profile}",
+            "--no-first-run",
+            "--no-default-browser-check",
+            "--headless=new",
+            "--disable-gpu",
+        ],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    deadline = time.monotonic() + 15
+    while time.monotonic() < deadline:
+        try:
+            with urllib.request.urlopen(f"http://127.0.0.1:{port}/json/version", timeout=1) as r:
+                info = json.loads(r.read().decode())
+                return proc, profile, info["webSocketDebuggerUrl"]
+        except Exception:
+            time.sleep(0.25)
+    proc.terminate()
+    raise RuntimeError("Chrome didn't expose CDP")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--iterations", type=int, default=50)
+    parser.add_argument("--port", type=int, default=9333)
+    args = parser.parse_args()
+
+    proc, profile, cdp_url = _start_chrome(args.port)
+    try:
+        from tools.browser_supervisor import SUPERVISOR_REGISTRY
+
+        # Warm up: start the supervisor, navigate to a page.
+        supervisor = SUPERVISOR_REGISTRY.get_or_start(
+            task_id="bench-eval", cdp_url=cdp_url
+        )
+        # Give it a moment to attach.
+        time.sleep(1.0)
+
+        # Sanity check: one eval over WS should succeed.
+        sanity = supervisor.evaluate_runtime("1 + 1")
+        if not sanity.get("ok") or sanity.get("result") != 2:
+            print(f"sanity check failed: {sanity}", file=sys.stderr)
+            sys.exit(2)
+
+        # ── Bench 1: supervisor WS path ──────────────────────────────────
+        ws_times: list[float] = []
+        for _ in range(args.iterations):
+            t0 = time.monotonic()
+            out = supervisor.evaluate_runtime("1 + 1")
+            t1 = time.monotonic()
+            assert out.get("ok"), out
+            ws_times.append((t1 - t0) * 1000)
+
+        # ── Bench 2: agent-browser subprocess path ────────────────────────
+        # Skip if agent-browser isn't installed — the WS bench still tells
+        # us what we need.
+        if shutil.which("agent-browser") is None and shutil.which("npx") is None:
+            print("agent-browser CLI not found — skipping subprocess bench.")
+            sub_times = []
+        else:
+            from tools.browser_tool import _run_browser_command, _last_session_key
+            task_id = _last_session_key("bench-eval")
+            sub_times = []
+            for _ in range(args.iterations):
+                t0 = time.monotonic()
+                _run_browser_command(task_id, "eval", ["1 + 1"])
+                t1 = time.monotonic()
+                sub_times.append((t1 - t0) * 1000)
+
+        def fmt(name: str, ts: list[float]) -> str:
+            if not ts:
+                return f"  {name:<40} (skipped)"
+            mean = statistics.mean(ts)
+            median = statistics.median(ts)
+            mn, mx = min(ts), max(ts)
+            return (
+                f"  {name:<40} mean={mean:>7.2f}ms  median={median:>7.2f}ms  "
+                f"min={mn:>7.2f}ms  max={mx:>7.2f}ms"
+            )
+
+        print()
+        print(f"browser_eval benchmark — {args.iterations} iterations of `1 + 1`")
+        print("-" * 90)
+        print(fmt("supervisor WS (Runtime.evaluate)", ws_times))
+        print(fmt("agent-browser subprocess (eval)", sub_times))
+        if ws_times and sub_times:
+            speedup = statistics.mean(sub_times) / statistics.mean(ws_times)
+            print()
+            print(f"Speedup: {speedup:.1f}x (mean)")
+
+    finally:
+        SUPERVISOR_REGISTRY.stop_all()
+        proc.terminate()
+        try:
+            proc.wait(timeout=3)
+        except Exception:
+            proc.kill()
+        shutil.rmtree(profile, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/tools/test_browser_eval_supervisor_path.py b/tests/tools/test_browser_eval_supervisor_path.py
new file mode 100644
index 00000000000..8528b099489
--- /dev/null
+++ b/tests/tools/test_browser_eval_supervisor_path.py
@@ -0,0 +1,363 @@
+"""Unit tests for the supervisor-WS fast path in browser_console / _browser_eval.
+
+These exercise the dispatch logic in ``tools.browser_tool._browser_eval`` and
+the response shaping in ``CDPSupervisor.evaluate_runtime`` using mocks — no
+real browser, no real WebSocket.  Real-CDP coverage lives in
+``tests/tools/test_browser_supervisor.py`` (gated on Chrome being installed).
+"""
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Fast-path dispatch: tools.browser_tool._browser_eval
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _disable_camofox(monkeypatch):
+    """Force the non-camofox path so our supervisor branch is reached."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "_is_camofox_mode", lambda: False)
+    monkeypatch.setattr(bt, "_last_session_key", lambda task_id: "test-task")
+
+
+def _patch_supervisor(monkeypatch, supervisor):
+    """Wire SUPERVISOR_REGISTRY.get to return ``supervisor`` for any task_id."""
+    import tools.browser_supervisor as bs
+
+    registry = MagicMock()
+    registry.get.return_value = supervisor
+    monkeypatch.setattr(bs, "SUPERVISOR_REGISTRY", registry)
+    return registry
+
+
+class TestBrowserEvalSupervisorPath:
+    """The supervisor fast path replaces the agent-browser subprocess hop."""
+
+    def test_primitive_result_routes_through_supervisor(self, monkeypatch):
+        import tools.browser_tool as bt
+
+        sup = MagicMock()
+        sup.evaluate_runtime.return_value = {
+            "ok": True,
+            "result": 42,
+            "result_type": "number",
+        }
+        _patch_supervisor(monkeypatch, sup)
+        # If the subprocess path is hit we want a loud failure.
+        monkeypatch.setattr(
+            bt, "_run_browser_command",
+            lambda *a, **kw: pytest.fail("subprocess path must not run when supervisor is healthy"),
+        )
+
+        out = json.loads(bt._browser_eval("1 + 41"))
+        assert out["success"] is True
+        assert out["result"] == 42
+        assert out["method"] == "cdp_supervisor"
+        sup.evaluate_runtime.assert_called_once_with("1 + 41")
+
+    def test_json_string_result_is_parsed(self, monkeypatch):
+        """Match agent-browser semantics: JSON-string results get parsed."""
+        import tools.browser_tool as bt
+
+        sup = MagicMock()
+        sup.evaluate_runtime.return_value = {
+            "ok": True,
+            "result": '{"a": 1, "b": [2, 3]}',
+            "result_type": "string",
+        }
+        _patch_supervisor(monkeypatch, sup)
+        monkeypatch.setattr(
+            bt, "_run_browser_command",
+            lambda *a, **kw: pytest.fail("subprocess path must not run"),
+        )
+
+        out = json.loads(bt._browser_eval('JSON.stringify({a:1,b:[2,3]})'))
+        assert out["success"] is True
+        assert out["result"] == {"a": 1, "b": [2, 3]}
+        # result_type reflects the parsed Python type, not the raw JS type.
+        assert out["result_type"] == "dict"
+
+    def test_non_json_string_result_kept_as_string(self, monkeypatch):
+        import tools.browser_tool as bt
+
+        sup = MagicMock()
+        sup.evaluate_runtime.return_value = {
+            "ok": True,
+            "result": "hello world",
+            "result_type": "string",
+        }
+        _patch_supervisor(monkeypatch, sup)
+        monkeypatch.setattr(bt, "_run_browser_command", lambda *a, **kw: pytest.fail("nope"))
+
+        out = json.loads(bt._browser_eval('"hello world"'))
+        assert out["result"] == "hello world"
+        assert out["result_type"] == "str"
+
+    def test_js_exception_surfaces_without_subprocess_fallthrough(self, monkeypatch):
+        """A JS-side error must NOT trigger a (slow + redundant) subprocess retry."""
+        import tools.browser_tool as bt
+
+        sup = MagicMock()
+        sup.evaluate_runtime.return_value = {
+            "ok": False,
+            "error": "Uncaught ReferenceError: foo is not defined",
+        }
+        _patch_supervisor(monkeypatch, sup)
+        called = {"subprocess": False}
+
+        def _fake_subprocess(*a, **kw):
+            called["subprocess"] = True
+            return {"success": True, "data": {"result": "should-not-be-used"}}
+
+        monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess)
+
+        out = json.loads(bt._browser_eval("foo.bar"))
+        assert out["success"] is False
+        assert "ReferenceError" in out["error"]
+        assert called["subprocess"] is False, \
+            "JS exception should be surfaced, not retried via subprocess"
+
+    def test_supervisor_loop_down_falls_through_to_subprocess(self, monkeypatch):
+        """When the supervisor itself is unavailable, fall back to the subprocess."""
+        import tools.browser_tool as bt
+
+        sup = MagicMock()
+        sup.evaluate_runtime.return_value = {
+            "ok": False,
+            "error": "supervisor loop is not running",
+        }
+        _patch_supervisor(monkeypatch, sup)
+
+        called = {"subprocess": False}
+
+        def _fake_subprocess(task_id, cmd, args):
+            called["subprocess"] = True
+            assert cmd == "eval"
+            return {"success": True, "data": {"result": "fallback-result"}}
+
+        monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess)
+
+        out = json.loads(bt._browser_eval("anything"))
+        assert called["subprocess"] is True
+        assert out["success"] is True
+        assert out["result"] == "fallback-result"
+        # Subprocess path doesn't tag the response with method=cdp_supervisor.
+        assert out.get("method") != "cdp_supervisor"
+
+    def test_no_active_supervisor_falls_through_to_subprocess(self, monkeypatch):
+        """When SUPERVISOR_REGISTRY.get returns None, subprocess path runs."""
+        import tools.browser_tool as bt
+
+        _patch_supervisor(monkeypatch, None)
+        called = {"subprocess": False}
+
+        def _fake_subprocess(task_id, cmd, args):
+            called["subprocess"] = True
+            return {"success": True, "data": {"result": "agent-browser-result"}}
+
+        monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess)
+
+        out = json.loads(bt._browser_eval("1+1"))
+        assert called["subprocess"] is True
+        assert out["success"] is True
+        assert out.get("method") != "cdp_supervisor"
+
+    def test_supervisor_no_session_falls_through(self, monkeypatch):
+        """A supervisor without an attached page session must fall through cleanly."""
+        import tools.browser_tool as bt
+
+        sup = MagicMock()
+        sup.evaluate_runtime.return_value = {
+            "ok": False,
+            "error": "supervisor has no attached page session",
+        }
+        _patch_supervisor(monkeypatch, sup)
+        called = {"subprocess": False}
+
+        def _fake_subprocess(*a, **kw):
+            called["subprocess"] = True
+            return {"success": True, "data": {"result": "fallback"}}
+
+        monkeypatch.setattr(bt, "_run_browser_command", _fake_subprocess)
+        json.loads(bt._browser_eval("1+1"))
+        assert called["subprocess"] is True
+
+
+# ---------------------------------------------------------------------------
+# Response shaping: CDPSupervisor.evaluate_runtime
+# ---------------------------------------------------------------------------
+
+
+def _make_supervisor_with_cdp(cdp_response):
+    """Build a CDPSupervisor instance that mocks ``_cdp`` to return ``cdp_response``.
+
+    Bypasses ``__init__`` entirely so we don't need a real WS connection.  We
+    set just the state ``evaluate_runtime`` reads.
+    """
+    import asyncio
+    import threading
+
+    from tools.browser_supervisor import CDPSupervisor
+
+    sup = object.__new__(CDPSupervisor)
+    sup._state_lock = threading.Lock()
+    sup._active = True
+    sup._page_session_id = "test-session-id"
+
+    # Build a real running event loop on a background thread so
+    # asyncio.run_coroutine_threadsafe has somewhere to dispatch.
+    loop = asyncio.new_event_loop()
+
+    def _runner():
+        asyncio.set_event_loop(loop)
+        loop.run_forever()
+
+    thread = threading.Thread(target=_runner, daemon=True)
+    thread.start()
+
+    async def _fake_cdp(method, params=None, *, session_id=None, timeout=10.0):
+        return cdp_response
+
+    sup._cdp = _fake_cdp  # type: ignore[method-assign]
+    sup._loop = loop
+    sup._thread = thread
+    return sup
+
+
+def _stop_supervisor(sup):
+    sup._loop.call_soon_threadsafe(sup._loop.stop)
+    sup._thread.join(timeout=2)
+
+
+class TestEvaluateRuntimeResponseShaping:
+    """CDPSupervisor.evaluate_runtime decodes the Runtime.evaluate response correctly."""
+
+    def test_primitive_value(self):
+        sup = _make_supervisor_with_cdp({
+            "id": 1,
+            "result": {"result": {"type": "number", "value": 42}},
+        })
+        try:
+            out = sup.evaluate_runtime("1 + 41")
+            assert out == {"ok": True, "result": 42, "result_type": "number"}
+        finally:
+            _stop_supervisor(sup)
+
+    def test_object_value_returned_by_value(self):
+        sup = _make_supervisor_with_cdp({
+            "id": 1,
+            "result": {
+                "result": {
+                    "type": "object",
+                    "value": {"foo": "bar", "n": 7},
+                }
+            },
+        })
+        try:
+            out = sup.evaluate_runtime('({foo:"bar", n:7})')
+            assert out["ok"] is True
+            assert out["result"] == {"foo": "bar", "n": 7}
+            assert out["result_type"] == "object"
+        finally:
+            _stop_supervisor(sup)
+
+    def test_undefined_value(self):
+        sup = _make_supervisor_with_cdp({
+            "id": 1,
+            "result": {"result": {"type": "undefined"}},
+        })
+        try:
+            out = sup.evaluate_runtime("undefined")
+            assert out == {"ok": True, "result": None, "result_type": "undefined"}
+        finally:
+            _stop_supervisor(sup)
+
+    def test_dom_node_returns_description(self):
+        """Non-serializable values (DOM nodes, functions) come back as description strings."""
+        sup = _make_supervisor_with_cdp({
+            "id": 1,
+            "result": {
+                "result": {
+                    "type": "object",
+                    "subtype": "node",
+                    "description": "div#main.app",
+                    # No 'value' key — returnByValue couldn't serialize it.
+                }
+            },
+        })
+        try:
+            out = sup.evaluate_runtime("document.querySelector('#main')")
+            assert out["ok"] is True
+            assert out["result"] == "div#main.app"
+            assert out["result_type"] == "object"
+        finally:
+            _stop_supervisor(sup)
+
+    def test_js_exception_returns_error(self):
+        sup = _make_supervisor_with_cdp({
+            "id": 1,
+            "result": {
+                "result": {"type": "undefined"},
+                "exceptionDetails": {
+                    "text": "Uncaught",
+                    "exception": {
+                        "description": "ReferenceError: foo is not defined",
+                    },
+                },
+            },
+        })
+        try:
+            out = sup.evaluate_runtime("foo.bar")
+            assert out["ok"] is False
+            assert "ReferenceError" in out["error"]
+        finally:
+            _stop_supervisor(sup)
+
+    def test_inactive_supervisor_returns_error_without_dispatch(self):
+        """Inactive supervisor short-circuits before even touching the loop."""
+        import threading
+        from tools.browser_supervisor import CDPSupervisor
+
+        sup = object.__new__(CDPSupervisor)
+        sup._state_lock = threading.Lock()
+        sup._active = False  # ← key
+        sup._page_session_id = None
+        sup._loop = None
+
+        out = sup.evaluate_runtime("1+1")
+        assert out["ok"] is False
+        # Either "loop is not running" or "is not active" is acceptable —
+        # both are caught by the supervisor-side error branch in _browser_eval.
+        assert "supervisor" in out["error"].lower()
+
+    def test_no_session_attached_returns_error(self):
+        import asyncio
+        import threading
+        from tools.browser_supervisor import CDPSupervisor
+
+        sup = object.__new__(CDPSupervisor)
+        sup._state_lock = threading.Lock()
+        sup._active = True
+        sup._page_session_id = None  # ← attach hasn't happened yet
+
+        loop = asyncio.new_event_loop()
+        thread = threading.Thread(
+            target=lambda: (asyncio.set_event_loop(loop), loop.run_forever()),
+            daemon=True,
+        )
+        thread.start()
+        sup._loop = loop
+        try:
+            out = sup.evaluate_runtime("1+1")
+            assert out["ok"] is False
+            assert "session" in out["error"].lower()
+        finally:
+            loop.call_soon_threadsafe(loop.stop)
+            thread.join(timeout=2)
diff --git a/tests/tools/test_browser_supervisor.py b/tests/tools/test_browser_supervisor.py
index e332aec43f9..360fec53a04 100644
--- a/tests/tools/test_browser_supervisor.py
+++ b/tests/tools/test_browser_supervisor.py
@@ -561,3 +561,80 @@ def test_bridge_captures_prompt_and_returns_reply_text(chrome_cdp, supervisor_re
 
     value = asyncio.run(nav_and_read())
     assert value == "AGENT-SUPPLIED-REPLY", f"expected AGENT-SUPPLIED-REPLY, got {value!r}"
+
+
+def test_evaluate_runtime_primitive(chrome_cdp, supervisor_registry):
+    """evaluate_runtime returns primitive values via the supervisor's live WS."""
+    cdp_url, _port = chrome_cdp
+    supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-1", cdp_url=cdp_url)
+
+    # Need a page to evaluate against.
+    _fire_on_page(cdp_url, "void 0")
+    time.sleep(0.5)
+
+    out = supervisor.evaluate_runtime("1 + 41")
+    assert out["ok"] is True
+    assert out["result"] == 42
+    assert out["result_type"] == "number"
+
+
+def test_evaluate_runtime_object(chrome_cdp, supervisor_registry):
+    """Plain objects come back JSON-serialized via returnByValue=True."""
+    cdp_url, _port = chrome_cdp
+    supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-2", cdp_url=cdp_url)
+
+    _fire_on_page(cdp_url, "void 0")
+    time.sleep(0.5)
+
+    out = supervisor.evaluate_runtime('({foo: "bar", n: 7})')
+    assert out["ok"] is True
+    assert out["result"] == {"foo": "bar", "n": 7}
+    assert out["result_type"] == "object"
+
+
+def test_evaluate_runtime_js_exception(chrome_cdp, supervisor_registry):
+    """JS exceptions surface as ok=False with the exception message."""
+    cdp_url, _port = chrome_cdp
+    supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-3", cdp_url=cdp_url)
+
+    _fire_on_page(cdp_url, "void 0")
+    time.sleep(0.5)
+
+    out = supervisor.evaluate_runtime("nonExistentVar.nope")
+    assert out["ok"] is False
+    assert "ReferenceError" in out["error"] or "not defined" in out["error"]
+
+
+def test_evaluate_runtime_dom_node_returns_empty_object(chrome_cdp, supervisor_registry):
+    """DOM nodes with returnByValue=true serialize to ``{}`` (Chrome quirk).
+
+    This is honest — DOM nodes can't be deeply JSON-serialized — and matches
+    DevTools console behaviour for the same expression.  Documenting the
+    contract here so a future change that "fixes" it (e.g. switching to
+    returnByValue=false + DOM.describeNode) doesn't break callers expecting
+    the current shape.
+    """
+    cdp_url, _port = chrome_cdp
+    supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-4", cdp_url=cdp_url)
+
+    _fire_on_page(cdp_url, "void 0")
+    time.sleep(0.5)
+
+    out = supervisor.evaluate_runtime("document.querySelector('h1')")
+    assert out["ok"] is True
+    assert out["result_type"] == "object"
+    # Empty dict — Chrome can't deeply-serialize a DOM node through returnByValue.
+    assert out["result"] == {}
+
+
+def test_evaluate_runtime_unserializable_value(chrome_cdp, supervisor_registry):
+    """``Infinity``/``NaN``/``BigInt`` come back via ``unserializableValue``."""
+    cdp_url, _port = chrome_cdp
+    supervisor = supervisor_registry.get_or_start(task_id="pytest-eval-5", cdp_url=cdp_url)
+
+    _fire_on_page(cdp_url, "void 0")
+    time.sleep(0.5)
+
+    out = supervisor.evaluate_runtime("Infinity")
+    assert out["ok"] is True
+    assert out["result"] == "Infinity"
diff --git a/tools/browser_supervisor.py b/tools/browser_supervisor.py
index db0b1e29909..371210350ff 100644
--- a/tools/browser_supervisor.py
+++ b/tools/browser_supervisor.py
@@ -457,6 +457,89 @@ class CDPSupervisor:
             return {"ok": False, "error": f"{type(e).__name__}: {e}"}
         return {"ok": True, "dialog": snapshot_copy.to_dict()}
 
+    def evaluate_runtime(
+        self,
+        expression: str,
+        *,
+        return_by_value: bool = True,
+        await_promise: bool = True,
+        timeout: float = 10.0,
+    ) -> Dict[str, Any]:
+        """Evaluate ``expression`` in the page's Runtime context over the live WS.
+
+        Reuses the supervisor's already-connected WebSocket — zero subprocess
+        startup cost vs the agent-browser CLI ``eval`` command (which does
+        fork+exec+Node-startup+CDP-setup on every call).
+
+        Returns a dict shaped like ``{"ok": True, "result": <value>, "result_type": "..."}``
+        on success, or ``{"ok": False, "error": "..."}`` on failure.
+
+        ``return_by_value=True`` asks the browser to JSON-serialize the result
+        before sending it back, matching DevTools-console semantics for
+        primitive / plain-object expressions. For DOM nodes or non-serializable
+        objects, the browser returns a description string in ``result_type``.
+        """
+        loop = self._loop
+        if loop is None or not loop.is_running():
+            return {"ok": False, "error": "supervisor loop is not running"}
+
+        with self._state_lock:
+            if not self._active:
+                return {"ok": False, "error": "supervisor is not active"}
+            session_id = self._page_session_id
+
+        if not session_id:
+            return {"ok": False, "error": "supervisor has no attached page session"}
+
+        async def _do_eval() -> Dict[str, Any]:
+            return await self._cdp(
+                "Runtime.evaluate",
+                {
+                    "expression": expression,
+                    "returnByValue": return_by_value,
+                    "awaitPromise": await_promise,
+                    # userGesture matters for things like clipboard / fullscreen
+                    # APIs that require a user-activation context.
+                    "userGesture": True,
+                },
+                session_id=session_id,
+                timeout=timeout,
+            )
+
+        try:
+            fut = asyncio.run_coroutine_threadsafe(_do_eval(), loop)
+            response = fut.result(timeout=timeout + 1)
+        except Exception as exc:
+            return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
+
+        # Runtime.evaluate response shape:
+        #   {"id": N, "result": {"result": {"type": "...", "value": ..., ...},
+        #                         "exceptionDetails": {...} (only on error)}}
+        result_payload = response.get("result", {}) if isinstance(response, dict) else {}
+        exception_details = result_payload.get("exceptionDetails")
+        if exception_details:
+            # Surface the JS-side exception with a clean message.
+            exc_text = exception_details.get("text") or "JavaScript exception"
+            exc_obj = exception_details.get("exception") or {}
+            description = exc_obj.get("description")
+            if description:
+                exc_text = f"{exc_text}: {description}"
+            return {"ok": False, "error": exc_text}
+
+        result_obj = result_payload.get("result", {})
+        result_type = result_obj.get("type", "undefined")
+
+        if "value" in result_obj:
+            value = result_obj["value"]
+        elif result_type == "undefined":
+            value = None
+        else:
+            # Non-serializable (functions, DOM nodes, etc.) — return the
+            # browser's string description so the model gets *something*.
+            value = result_obj.get("description") or result_obj.get("unserializableValue")
+
+        return {"ok": True, "result": value, "result_type": result_type}
+
     # ── Supervisor loop internals ────────────────────────────────────────────
 
     def _thread_main(self) -> None:
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 084c4d3d31e..b1986f7b64b 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -2671,6 +2671,53 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
         return _camofox_eval(expression, task_id)
 
     effective_task_id = _last_session_key(task_id or "default")
+
+    # --- Fast path: route through the supervisor's persistent CDP WS ---------
+    # When a CDPSupervisor is alive for this task_id, ``Runtime.evaluate`` runs
+    # on the already-connected WebSocket — zero subprocess startup cost vs
+    # spawning an ``agent-browser eval`` CLI process.  Falls through to the
+    # subprocess path on any error so behaviour is unchanged when no
+    # supervisor is running (e.g. plain agent-browser without a CDP backend).
+    try:
+        from tools.browser_supervisor import SUPERVISOR_REGISTRY  # type: ignore[import-not-found]
+        supervisor = SUPERVISOR_REGISTRY.get(effective_task_id)
+        if supervisor is not None:
+            sup_result = supervisor.evaluate_runtime(expression)
+            if sup_result.get("ok"):
+                raw_result = sup_result.get("result")
+                # Match the agent-browser path: if the value is a JSON string,
+                # parse it so the model gets structured data.
+                parsed = raw_result
+                if isinstance(raw_result, str):
+                    try:
+                        parsed = json.loads(raw_result)
+                    except (json.JSONDecodeError, ValueError):
+                        pass  # keep as string
+                response = {
+                    "success": True,
+                    "result": parsed,
+                    "result_type": type(parsed).__name__,
+                    "method": "cdp_supervisor",
+                }
+                return json.dumps(response, ensure_ascii=False, default=str)
+            # JS exception is a real failure — surface it instead of falling
+            # through to the subprocess path (which would just re-run and
+            # produce the same exception, but slower).
+            err = sup_result.get("error") or "evaluate_runtime failed"
+            if "supervisor" not in err.lower():
+                # Real JS-side error — return it.
+                return json.dumps({"success": False, "error": err}, ensure_ascii=False)
+            # Supervisor-side failure (loop down, no session) — fall through.
+            logger.debug(
+                "browser_eval: supervisor path unavailable (%s), falling back to subprocess",
+                err,
+            )
+    except ImportError:
+        pass
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("browser_eval: supervisor path errored (%s), falling back", exc)
+
+    # --- Fallback: agent-browser CLI subprocess (original path) -------------
     result = _run_browser_command(effective_task_id, "eval", [expression])
 
     if not result.get("success"):
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index c078ed49769..2ae5e2b5aa4 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -423,6 +423,15 @@ Check the browser console for any JavaScript errors
 
 Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.
 
+`browser_console` also evaluates JavaScript when called with an `expression` argument — same shape as DevTools console, the result comes back parsed (JSON-serialized objects become dicts; primitive values stay primitive).
+
+```
+browser_console(expression="document.querySelector('h1').textContent")
+browser_console(expression="JSON.stringify(performance.timing)")
+```
+
+When a CDP supervisor is active for the current session (typical for any session that's run `browser_navigate` against a CDP-capable backend), evaluation runs over the supervisor's persistent WebSocket — no subprocess startup cost. Falls through to the standard agent-browser CLI path otherwise. Behaviour is identical either way; only latency changes.
+
 ### `browser_cdp`
 
 Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.