mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-20 05:01:30 +00:00
perf(browser): route browser_console eval through supervisor's persistent CDP WS (180x faster) (#23226)
Adds CDPSupervisor.evaluate_runtime() and wires it into _browser_eval as a fast path when a supervisor is alive for the current task_id. Replaces the ~180ms agent-browser subprocess fork+exec+Node-startup hop with a ~1ms Runtime.evaluate over the supervisor's already-connected WebSocket. Falls through to the existing agent-browser CLI path when no supervisor is running (e.g. backends without CDP, or before the first browser_navigate attaches one), so behaviour is unchanged where it can't apply. JS-side exceptions surface directly without falling through to the subprocess (the subprocess would just re-raise the same error, slower); supervisor-side failures (loop down, no session) fall through cleanly. Benchmark — 30 iterations of `1 + 1` against headless Chrome: supervisor WS mean= 0.96ms median= 0.91ms agent-browser subprocess mean=179.35ms median=167.73ms → 187x speedup mean Tests: 14 unit tests (mocked supervisor + response-shape coverage), 5 real-Chrome e2e tests in test_browser_supervisor.py (gated on Chrome being installed). Browser test suite: 355 passed, 1 skipped.
This commit is contained in:
parent
08c5b35a73
commit
d4b26df897
6 changed files with 717 additions and 0 deletions
|
|
@ -457,6 +457,89 @@ class CDPSupervisor:
|
|||
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
||||
return {"ok": True, "dialog": snapshot_copy.to_dict()}
|
||||
|
||||
def evaluate_runtime(
|
||||
self,
|
||||
expression: str,
|
||||
*,
|
||||
return_by_value: bool = True,
|
||||
await_promise: bool = True,
|
||||
timeout: float = 10.0,
|
||||
) -> Dict[str, Any]:
|
||||
"""Evaluate ``expression`` in the page's Runtime context over the live WS.
|
||||
|
||||
Reuses the supervisor's already-connected WebSocket — zero subprocess
|
||||
startup cost vs the agent-browser CLI ``eval`` command (which does
|
||||
fork+exec+Node-startup+CDP-setup on every call).
|
||||
|
||||
Returns a dict shaped like ``{"ok": True, "result": <value>, "result_type": "..."}``
|
||||
on success, or ``{"ok": False, "error": "..."}`` on failure.
|
||||
|
||||
``return_by_value=True`` asks the browser to JSON-serialize the result
|
||||
before sending it back, matching DevTools-console semantics for
|
||||
primitive / plain-object expressions. For DOM nodes or non-serializable
|
||||
objects, the browser returns a description string in ``result_type``.
|
||||
"""
|
||||
loop = self._loop
|
||||
if loop is None or not loop.is_running():
|
||||
return {"ok": False, "error": "supervisor loop is not running"}
|
||||
|
||||
with self._state_lock:
|
||||
if not self._active:
|
||||
return {"ok": False, "error": "supervisor is not active"}
|
||||
session_id = self._page_session_id
|
||||
|
||||
if not session_id:
|
||||
return {"ok": False, "error": "supervisor has no attached page session"}
|
||||
|
||||
async def _do_eval() -> Dict[str, Any]:
|
||||
return await self._cdp(
|
||||
"Runtime.evaluate",
|
||||
{
|
||||
"expression": expression,
|
||||
"returnByValue": return_by_value,
|
||||
"awaitPromise": await_promise,
|
||||
# userGesture matters for things like clipboard / fullscreen
|
||||
# APIs that require a user-activation context.
|
||||
"userGesture": True,
|
||||
},
|
||||
session_id=session_id,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
try:
|
||||
fut = asyncio.run_coroutine_threadsafe(_do_eval(), loop)
|
||||
response = fut.result(timeout=timeout + 1)
|
||||
except Exception as exc:
|
||||
return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
|
||||
|
||||
# Runtime.evaluate response shape:
|
||||
# {"id": N, "result": {"result": {"type": "...", "value": ..., ...},
|
||||
# "exceptionDetails": {...} (only on error)}}
|
||||
result_payload = response.get("result", {}) if isinstance(response, dict) else {}
|
||||
exception_details = result_payload.get("exceptionDetails")
|
||||
if exception_details:
|
||||
# Surface the JS-side exception with a clean message.
|
||||
exc_text = exception_details.get("text") or "JavaScript exception"
|
||||
exc_obj = exception_details.get("exception") or {}
|
||||
description = exc_obj.get("description")
|
||||
if description:
|
||||
exc_text = f"{exc_text}: {description}"
|
||||
return {"ok": False, "error": exc_text}
|
||||
|
||||
result_obj = result_payload.get("result", {})
|
||||
result_type = result_obj.get("type", "undefined")
|
||||
|
||||
if "value" in result_obj:
|
||||
value = result_obj["value"]
|
||||
elif result_type == "undefined":
|
||||
value = None
|
||||
else:
|
||||
# Non-serializable (functions, DOM nodes, etc.) — return the
|
||||
# browser's string description so the model gets *something*.
|
||||
value = result_obj.get("description") or result_obj.get("unserializableValue")
|
||||
|
||||
return {"ok": True, "result": value, "result_type": result_type}
|
||||
|
||||
# ── Supervisor loop internals ────────────────────────────────────────────
|
||||
|
||||
def _thread_main(self) -> None:
|
||||
|
|
|
|||
|
|
@ -2671,6 +2671,53 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
|
|||
return _camofox_eval(expression, task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
|
||||
# --- Fast path: route through the supervisor's persistent CDP WS ---------
|
||||
# When a CDPSupervisor is alive for this task_id, ``Runtime.evaluate`` runs
|
||||
# on the already-connected WebSocket — zero subprocess startup cost vs
|
||||
# spawning an ``agent-browser eval`` CLI process. Falls through to the
|
||||
# subprocess path on any error so behaviour is unchanged when no
|
||||
# supervisor is running (e.g. plain agent-browser without a CDP backend).
|
||||
try:
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found]
|
||||
supervisor = SUPERVISOR_REGISTRY.get(effective_task_id)
|
||||
if supervisor is not None:
|
||||
sup_result = supervisor.evaluate_runtime(expression)
|
||||
if sup_result.get("ok"):
|
||||
raw_result = sup_result.get("result")
|
||||
# Match the agent-browser path: if the value is a JSON string,
|
||||
# parse it so the model gets structured data.
|
||||
parsed = raw_result
|
||||
if isinstance(raw_result, str):
|
||||
try:
|
||||
parsed = json.loads(raw_result)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass # keep as string
|
||||
response = {
|
||||
"success": True,
|
||||
"result": parsed,
|
||||
"result_type": type(parsed).__name__,
|
||||
"method": "cdp_supervisor",
|
||||
}
|
||||
return json.dumps(response, ensure_ascii=False, default=str)
|
||||
# JS exception is a real failure — surface it instead of falling
|
||||
# through to the subprocess path (which would just re-run and
|
||||
# produce the same exception, but slower).
|
||||
err = sup_result.get("error") or "evaluate_runtime failed"
|
||||
if "supervisor" not in err.lower():
|
||||
# Real JS-side error — return it.
|
||||
return json.dumps({"success": False, "error": err}, ensure_ascii=False)
|
||||
# Supervisor-side failure (loop down, no session) — fall through.
|
||||
logger.debug(
|
||||
"browser_eval: supervisor path unavailable (%s), falling back to subprocess",
|
||||
err,
|
||||
)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("browser_eval: supervisor path errored (%s), falling back", exc)
|
||||
|
||||
# --- Fallback: agent-browser CLI subprocess (original path) -------------
|
||||
result = _run_browser_command(effective_task_id, "eval", [expression])
|
||||
|
||||
if not result.get("success"):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue