perf(browser): route browser_console eval through supervisor's persistent CDP WS (180x faster) (#23226)

Adds CDPSupervisor.evaluate_runtime() and wires it into _browser_eval as a
fast path when a supervisor is alive for the current task_id. Replaces the
~180ms agent-browser subprocess fork+exec+Node-startup hop with a ~1ms
Runtime.evaluate over the supervisor's already-connected WebSocket.

Falls through to the existing agent-browser CLI path when no supervisor is
running (e.g. backends without CDP, or before the first browser_navigate
attaches one), so behaviour is unchanged where it can't apply.

JS-side exceptions surface directly without falling through to the
subprocess (the subprocess would just re-raise the same error, slower);
supervisor-side failures (loop down, no session) fall through cleanly.

Benchmark — 30 iterations of `1 + 1` against headless Chrome:
  supervisor WS              mean=  0.96ms  median=  0.91ms
  agent-browser subprocess   mean=179.35ms  median=167.73ms
  → 187x speedup mean

Tests: 14 unit tests (mocked supervisor + response-shape coverage), 5
real-Chrome e2e tests in test_browser_supervisor.py (gated on Chrome
being installed). Browser test suite: 355 passed, 1 skipped.
This commit is contained in:
Teknium 2026-05-10 07:37:55 -07:00 committed by GitHub
parent 08c5b35a73
commit d4b26df897
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 717 additions and 0 deletions

View file

@ -2671,6 +2671,53 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
return _camofox_eval(expression, task_id)
effective_task_id = _last_session_key(task_id or "default")
# --- Fast path: route through the supervisor's persistent CDP WS ---------
# When a CDPSupervisor is alive for this task_id, ``Runtime.evaluate`` runs
# on the already-connected WebSocket — zero subprocess startup cost vs
# spawning an ``agent-browser eval`` CLI process. Falls through to the
# subprocess path on any error so behaviour is unchanged when no
# supervisor is running (e.g. plain agent-browser without a CDP backend).
try:
from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found]
supervisor = SUPERVISOR_REGISTRY.get(effective_task_id)
if supervisor is not None:
sup_result = supervisor.evaluate_runtime(expression)
if sup_result.get("ok"):
raw_result = sup_result.get("result")
# Match the agent-browser path: if the value is a JSON string,
# parse it so the model gets structured data.
parsed = raw_result
if isinstance(raw_result, str):
try:
parsed = json.loads(raw_result)
except (json.JSONDecodeError, ValueError):
pass # keep as string
response = {
"success": True,
"result": parsed,
"result_type": type(parsed).__name__,
"method": "cdp_supervisor",
}
return json.dumps(response, ensure_ascii=False, default=str)
# JS exception is a real failure — surface it instead of falling
# through to the subprocess path (which would just re-run and
# produce the same exception, but slower).
err = sup_result.get("error") or "evaluate_runtime failed"
if "supervisor" not in err.lower():
# Real JS-side error — return it.
return json.dumps({"success": False, "error": err}, ensure_ascii=False)
# Supervisor-side failure (loop down, no session) — fall through.
logger.debug(
"browser_eval: supervisor path unavailable (%s), falling back to subprocess",
err,
)
except ImportError:
pass
except Exception as exc: # pragma: no cover — defensive
logger.debug("browser_eval: supervisor path errored (%s), falling back", exc)
# --- Fallback: agent-browser CLI subprocess (original path) -------------
result = _run_browser_command(effective_task_id, "eval", [expression])
if not result.get("success"):