From 395dbcc873c85b8873f4e36ff91b87c739bed242 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 4 May 2026 19:52:14 +0530 Subject: [PATCH] feat(browser): add Lightpanda engine support with automatic Chrome fallback Add Lightpanda as an optional browser engine for local mode. Lightpanda is a headless browser built from scratch in Zig -- faster navigation than Chrome with significantly less memory. One config line to enable: browser: engine: lightpanda New functions in browser_tool.py: - _get_browser_engine() -- config/env reader with validation + caching - _should_inject_engine() -- only inject in local non-cloud mode - _needs_lightpanda_fallback() -- detect empty/failed LP results - _chrome_fallback_screenshot() -- temporary Chrome session for screenshots - Engine injection in _run_browser_command (--engine flag) - browser_vision pre-routes screenshots to Chrome when engine=lightpanda Config: - browser.engine in DEFAULT_CONFIG (auto/lightpanda/chrome) - AGENT_BROWSER_ENGINE in OPTIONAL_ENV_VARS - /browser status shows engine info in local mode Rebased from PR #7144 onto current main. All existing code preserved -- pure additions only (+520/-2). 25 new tests + 81 total browser tests pass (0 failures). --- .env.example | 9 + cli.py | 16 +- hermes_cli/config.py | 16 + tests/tools/test_browser_lightpanda.py | 363 ++++++++++++++++++++ tools/browser_tool.py | 440 +++++++++++++++++++++---- 5 files changed, 770 insertions(+), 74 deletions(-) create mode 100644 tests/tools/test_browser_lightpanda.py diff --git a/.env.example b/.env.example index 589978e6b5..6cd9c30239 100644 --- a/.env.example +++ b/.env.example @@ -244,6 +244,15 @@ BROWSERBASE_PROXIES=true # Uses custom Chromium build to avoid bot detection altogether BROWSERBASE_ADVANCED_STEALTH=false +# Browser engine for local mode (default: auto = Chrome) +# "auto" — use Chrome (don't pass --engine flag) +# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots) +# "chrome" — explicitly request Chrome +# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return +# empty results are automatically retried with Chrome. +# Also configurable via browser.engine in config.yaml. +# AGENT_BROWSER_ENGINE=auto + # Browser session timeout in seconds (default: 300) # Sessions are cleaned up after this duration of inactivity BROWSER_SESSION_TIMEOUT=300 diff --git a/cli.py b/cli.py index 3806dc4a3a..a7245d50b3 100644 --- a/cli.py +++ b/cli.py @@ -298,6 +298,7 @@ def load_cli_config() -> Dict[str, Any]: "browser": { "inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min "record_sessions": False, # Auto-record browser sessions as WebM videos + "engine": "auto", # Browser engine: auto (Chrome), lightpanda, chrome }, "compression": { "enabled": True, # Auto-compress when approaching context limit @@ -7131,7 +7132,20 @@ class HermesCLI: if provider is not None: print(f"🌐 Browser: {provider.provider_name()} (cloud)") else: - print("🌐 Browser: local headless Chromium (agent-browser)") + # Show engine info for local mode + try: + from tools.browser_tool import _get_browser_engine + engine = _get_browser_engine() + except Exception: + engine = "auto" + if engine == "lightpanda": + print("🌐 Browser: local Lightpanda (agent-browser --engine lightpanda)") + print(" ⚡ Lightpanda: faster navigation, no screenshot support") + print(" Automatic Chrome fallback for screenshots and failed commands") + elif engine == "chrome": + print("🌐 Browser: local headless Chrome (agent-browser --engine chrome)") + else: + print("🌐 Browser: local headless Chromium (agent-browser)") print() print(" /browser connect — connect to your live Chrome") print(" /browser disconnect — revert to default") diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 275ce387d5..030421c90c 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -550,6 +550,13 @@ DEFAULT_CONFIG = { "command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.) "record_sessions": False, # Auto-record browser sessions as WebM videos "allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.) + # Browser engine for local mode. Passed as ``--engine `` to + # agent-browser v0.25.3+. + # "auto" — use Chrome (default, don't pass --engine at all) + # "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots) + # "chrome" — explicitly request Chrome + # Also settable via AGENT_BROWSER_ENGINE env var. + "engine": "auto", "auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud "cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome # CDP supervisor — dialog + frame detection via a persistent WebSocket. @@ -1827,6 +1834,15 @@ OPTIONAL_ENV_VARS = { "password": False, "category": "tool", }, + "AGENT_BROWSER_ENGINE": { + "description": "Browser engine for local mode: auto (default Chrome), lightpanda (faster, no screenshots), chrome", + "prompt": "Browser engine (auto/lightpanda/chrome)", + "url": "https://github.com/vercel-labs/agent-browser", + "tools": ["browser_navigate", "browser_snapshot", "browser_click", "browser_vision"], + "password": False, + "category": "tool", + "advanced": True, + }, "CAMOFOX_URL": { "description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)", "prompt": "Camofox server URL", diff --git a/tests/tools/test_browser_lightpanda.py b/tests/tools/test_browser_lightpanda.py new file mode 100644 index 0000000000..b65d1e7e52 --- /dev/null +++ b/tests/tools/test_browser_lightpanda.py @@ -0,0 +1,363 @@ +"""Tests for Lightpanda engine support in browser_tool.py.""" + +import json +import os +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _reset_engine_cache(): + """Reset the module-level engine cache so tests start clean.""" + import tools.browser_tool as bt + bt._cached_browser_engine = None + bt._browser_engine_resolved = False + + +@pytest.fixture(autouse=True) +def _clean_engine_cache(): + """Reset engine cache before and after each test.""" + _reset_engine_cache() + yield + _reset_engine_cache() + + +# --------------------------------------------------------------------------- +# _get_browser_engine +# --------------------------------------------------------------------------- + +class TestGetBrowserEngine: + """Test engine resolution from config and env vars.""" + + def test_default_is_auto(self): + """With no config or env var, engine defaults to 'auto'.""" + from tools.browser_tool import _get_browser_engine + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("AGENT_BROWSER_ENGINE", None) + with patch("hermes_cli.config.read_raw_config", return_value={}): + assert _get_browser_engine() == "auto" + + def test_config_lightpanda(self): + """Config browser.engine = 'lightpanda' is respected.""" + from tools.browser_tool import _get_browser_engine + cfg = {"browser": {"engine": "lightpanda"}} + with patch("hermes_cli.config.read_raw_config", return_value=cfg): + assert _get_browser_engine() == "lightpanda" + + def test_config_chrome(self): + """Config browser.engine = 'chrome' is respected.""" + from tools.browser_tool import _get_browser_engine + cfg = {"browser": {"engine": "chrome"}} + with patch("hermes_cli.config.read_raw_config", return_value=cfg): + assert _get_browser_engine() == "chrome" + + def test_env_var_fallback(self): + """AGENT_BROWSER_ENGINE env var is used when config has no engine key.""" + from tools.browser_tool import _get_browser_engine + with patch.dict(os.environ, {"AGENT_BROWSER_ENGINE": "lightpanda"}): + with patch("hermes_cli.config.read_raw_config", return_value={}): + assert _get_browser_engine() == "lightpanda" + + def test_config_takes_priority_over_env(self): + """Config value wins over env var.""" + from tools.browser_tool import _get_browser_engine + cfg = {"browser": {"engine": "chrome"}} + with patch.dict(os.environ, {"AGENT_BROWSER_ENGINE": "lightpanda"}): + with patch("hermes_cli.config.read_raw_config", return_value=cfg): + assert _get_browser_engine() == "chrome" + + def test_value_is_lowercased(self): + """Engine value is normalized to lowercase.""" + from tools.browser_tool import _get_browser_engine + cfg = {"browser": {"engine": "Lightpanda"}} + with patch("hermes_cli.config.read_raw_config", return_value=cfg): + assert _get_browser_engine() == "lightpanda" + + def test_invalid_engine_falls_back_to_auto(self): + """Unknown engine values are rejected and fall back to 'auto'.""" + from tools.browser_tool import _get_browser_engine + cfg = {"browser": {"engine": "firefox"}} + with patch("hermes_cli.config.read_raw_config", return_value=cfg): + assert _get_browser_engine() == "auto" + + def test_caching(self): + """Result is cached — second call doesn't re-read config.""" + from tools.browser_tool import _get_browser_engine + mock_read = MagicMock(return_value={"browser": {"engine": "lightpanda"}}) + with patch("hermes_cli.config.read_raw_config", mock_read): + assert _get_browser_engine() == "lightpanda" + assert _get_browser_engine() == "lightpanda" + mock_read.assert_called_once() + + +# --------------------------------------------------------------------------- +# _should_inject_engine +# --------------------------------------------------------------------------- + +class TestShouldInjectEngine: + """Test whether --engine flag is injected based on mode.""" + + def test_auto_never_injects(self): + from tools.browser_tool import _should_inject_engine + assert _should_inject_engine("auto") is False + + def test_lightpanda_injects_in_local_mode(self): + from tools.browser_tool import _should_inject_engine + with patch("tools.browser_tool._is_camofox_mode", return_value=False), \ + patch("tools.browser_tool._get_cdp_override", return_value=""), \ + patch("tools.browser_tool._get_cloud_provider", return_value=None): + assert _should_inject_engine("lightpanda") is True + + def test_chrome_injects_in_local_mode(self): + from tools.browser_tool import _should_inject_engine + with patch("tools.browser_tool._is_camofox_mode", return_value=False), \ + patch("tools.browser_tool._get_cdp_override", return_value=""), \ + patch("tools.browser_tool._get_cloud_provider", return_value=None): + assert _should_inject_engine("chrome") is True + + def test_no_inject_in_camofox_mode(self): + from tools.browser_tool import _should_inject_engine + with patch("tools.browser_tool._is_camofox_mode", return_value=True): + assert _should_inject_engine("lightpanda") is False + + def test_no_inject_with_cdp_override(self): + from tools.browser_tool import _should_inject_engine + with patch("tools.browser_tool._is_camofox_mode", return_value=False), \ + patch("tools.browser_tool._get_cdp_override", return_value="ws://localhost:9222"): + assert _should_inject_engine("lightpanda") is False + + def test_no_inject_with_cloud_provider(self): + from tools.browser_tool import _should_inject_engine + mock_provider = MagicMock() + with patch("tools.browser_tool._is_camofox_mode", return_value=False), \ + patch("tools.browser_tool._get_cdp_override", return_value=""), \ + patch("tools.browser_tool._get_cloud_provider", return_value=mock_provider): + assert _should_inject_engine("lightpanda") is False + + +# --------------------------------------------------------------------------- +# _needs_lightpanda_fallback +# --------------------------------------------------------------------------- + +class TestNeedsLightpandaFallback: + """Test fallback detection for Lightpanda results.""" + + def test_non_lightpanda_never_falls_back(self): + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": False, "error": "timeout"} + assert _needs_lightpanda_fallback("chrome", "open", result) is False + assert _needs_lightpanda_fallback("auto", "open", result) is False + + def test_failed_command_triggers_fallback(self): + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": False, "error": "page.goto: Timeout"} + assert _needs_lightpanda_fallback("lightpanda", "open", result) is True + + def test_empty_snapshot_triggers_fallback(self): + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": True, "data": {"snapshot": ""}} + assert _needs_lightpanda_fallback("lightpanda", "snapshot", result) is True + + def test_short_snapshot_triggers_fallback(self): + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": True, "data": {"snapshot": "- none"}} + assert _needs_lightpanda_fallback("lightpanda", "snapshot", result) is True + + def test_normal_snapshot_does_not_trigger(self): + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": True, "data": { + "snapshot": '- heading "Example Domain" [ref=e1]\n- link "Learn more" [ref=e2]' + }} + assert _needs_lightpanda_fallback("lightpanda", "snapshot", result) is False + + def test_small_screenshot_triggers_fallback(self, tmp_path): + from tools.browser_tool import _needs_lightpanda_fallback + # Create a tiny file simulating the Lightpanda placeholder PNG + placeholder = tmp_path / "placeholder.png" + placeholder.write_bytes(b"\x89PNG" + b"\x00" * 2000) # ~2KB + result = {"success": True, "data": {"path": str(placeholder)}} + assert _needs_lightpanda_fallback("lightpanda", "screenshot", result) is True + + def test_actual_placeholder_size_triggers_fallback(self, tmp_path): + from tools.browser_tool import _needs_lightpanda_fallback + # Lightpanda PR #1766 resized the placeholder to 1920x1080 (~17 KB) + placeholder = tmp_path / "placeholder_1920.png" + placeholder.write_bytes(b"\x89PNG" + b"\x00" * 16693) # actual measured: 16697 bytes + result = {"success": True, "data": {"path": str(placeholder)}} + assert _needs_lightpanda_fallback("lightpanda", "screenshot", result) is True + + def test_normal_screenshot_does_not_trigger(self, tmp_path): + from tools.browser_tool import _needs_lightpanda_fallback + # Create a larger file simulating a real Chrome screenshot + real_screenshot = tmp_path / "real.png" + real_screenshot.write_bytes(b"\x89PNG" + b"\x00" * 50_000) # ~50KB + result = {"success": True, "data": {"path": str(real_screenshot)}} + assert _needs_lightpanda_fallback("lightpanda", "screenshot", result) is False + + def test_successful_open_does_not_trigger(self): + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": True, "data": {"title": "Example", "url": "https://example.com"}} + assert _needs_lightpanda_fallback("lightpanda", "open", result) is False + + def test_close_command_never_triggers_fallback(self): + """Session-management commands like 'close' are not fallback-eligible.""" + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": False, "error": "session closed"} + assert _needs_lightpanda_fallback("lightpanda", "close", result) is False + + def test_record_command_never_triggers_fallback(self): + """The 'record' command is tied to the engine daemon — not retryable.""" + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": False, "error": "recording failed"} + assert _needs_lightpanda_fallback("lightpanda", "record", result) is False + + def test_unknown_command_does_not_trigger_fallback(self): + """Commands not in the whitelist should not trigger fallback.""" + from tools.browser_tool import _needs_lightpanda_fallback + result = {"success": False, "error": "nope"} + assert _needs_lightpanda_fallback("lightpanda", "some_future_cmd", result) is False + + +# --------------------------------------------------------------------------- +# Config integration +# --------------------------------------------------------------------------- + +class TestConfigIntegration: + """Verify engine config is in DEFAULT_CONFIG.""" + + def test_engine_in_default_config(self): + from hermes_cli.config import DEFAULT_CONFIG + assert "engine" in DEFAULT_CONFIG["browser"] + assert DEFAULT_CONFIG["browser"]["engine"] == "auto" + + def test_env_var_registered(self): + from hermes_cli.config import OPTIONAL_ENV_VARS + assert "AGENT_BROWSER_ENGINE" in OPTIONAL_ENV_VARS + entry = OPTIONAL_ENV_VARS["AGENT_BROWSER_ENGINE"] + assert entry["category"] == "tool" + assert entry["advanced"] is True + + +# --------------------------------------------------------------------------- +# cleanup_all_browsers resets engine cache +# --------------------------------------------------------------------------- + +class TestCleanupResetsEngineCache: + """Verify cleanup_all_browsers resets engine-related globals.""" + + def test_engine_cache_reset(self): + import tools.browser_tool as bt + # Seed the cache + bt._cached_browser_engine = "lightpanda" + bt._browser_engine_resolved = True + # cleanup should reset them + bt.cleanup_all_browsers() + assert bt._cached_browser_engine is None + assert bt._browser_engine_resolved is False + + +# --------------------------------------------------------------------------- +# _engine_override parameter +# --------------------------------------------------------------------------- + +class TestEngineOverride: + """Verify _engine_override bypasses the cached engine.""" + + @patch("tools.browser_tool._get_session_info") + @patch("tools.browser_tool._find_agent_browser", return_value="/usr/bin/agent-browser") + @patch("tools.browser_tool._is_local_mode", return_value=True) + @patch("tools.browser_tool._chromium_installed", return_value=True) + @patch("tools.browser_tool._get_cloud_provider", return_value=None) + @patch("tools.browser_tool._get_cdp_override", return_value="") + @patch("tools.browser_tool._is_camofox_mode", return_value=False) + def test_override_prevents_engine_injection( + self, _camofox, _cdp, _cloud, _chromium, _local, _find, _session + ): + """When _engine_override='auto', --engine flag is NOT injected.""" + import tools.browser_tool as bt + + # Set the global cache to lightpanda + bt._cached_browser_engine = "lightpanda" + bt._browser_engine_resolved = True + + _session.return_value = {"session_name": "test-sess"} + + # Track the cmd_parts that Popen receives + captured_cmds = [] + mock_proc = MagicMock() + mock_proc.wait.return_value = None + mock_proc.returncode = 0 + + def capture_popen(cmd, **kwargs): + captured_cmds.append(cmd) + return mock_proc + + # We need to mock the file operations too + with patch("subprocess.Popen", side_effect=capture_popen), \ + patch("os.open", return_value=99), \ + patch("os.close"), \ + patch("os.unlink"), \ + patch("os.makedirs"), \ + patch("builtins.open", MagicMock(return_value=MagicMock( + __enter__=MagicMock(return_value=MagicMock(read=MagicMock(return_value='{"success": true, "data": {}}'))), + __exit__=MagicMock(return_value=False), + ))), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("tools.browser_tool._write_owner_pid"): + bt._run_browser_command("task1", "snapshot", [], _engine_override="auto") + + # Should NOT contain "--engine" since override is "auto" + assert len(captured_cmds) == 1 + assert "--engine" not in captured_cmds[0] + + @patch("tools.browser_tool._get_session_info") + @patch("tools.browser_tool._find_agent_browser", return_value="/usr/bin/agent-browser") + @patch("tools.browser_tool._is_local_mode", return_value=True) + @patch("tools.browser_tool._chromium_installed", return_value=True) + @patch("tools.browser_tool._get_cloud_provider", return_value=None) + @patch("tools.browser_tool._get_cdp_override", return_value="") + @patch("tools.browser_tool._is_camofox_mode", return_value=False) + def test_no_override_uses_cached_engine( + self, _camofox, _cdp, _cloud, _chromium, _local, _find, _session + ): + """Without _engine_override, the cached engine is used.""" + import tools.browser_tool as bt + + bt._cached_browser_engine = "lightpanda" + bt._browser_engine_resolved = True + + _session.return_value = {"session_name": "test-sess"} + + captured_cmds = [] + mock_proc = MagicMock() + mock_proc.wait.return_value = None + mock_proc.returncode = 0 + + def capture_popen(cmd, **kwargs): + captured_cmds.append(cmd) + return mock_proc + + # Return a substantive snapshot so the LP fallback does NOT trigger. + mock_stdout = '{"success": true, "data": {"snapshot": "- heading \\"Hello\\" [ref=e1]", "refs": {"e1": {}}}}' + with patch("subprocess.Popen", side_effect=capture_popen), \ + patch("os.open", return_value=99), \ + patch("os.close"), \ + patch("os.unlink"), \ + patch("os.makedirs"), \ + patch("builtins.open", MagicMock(return_value=MagicMock( + __enter__=MagicMock(return_value=MagicMock(read=MagicMock(return_value=mock_stdout))), + __exit__=MagicMock(return_value=False), + ))), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("tools.browser_tool._write_owner_pid"): + bt._run_browser_command("task1", "snapshot", []) + + # SHOULD contain "--engine lightpanda" + assert len(captured_cmds) == 1 + assert "--engine" in captured_cmds[0] + engine_idx = captured_cmds[0].index("--engine") + assert captured_cmds[0][engine_idx + 1] == "lightpanda" diff --git a/tools/browser_tool.py b/tools/browser_tool.py index f394e5b2f6..195aa1ee44 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -400,6 +400,11 @@ _cached_allow_private_urls: Optional[bool] = None _cached_agent_browser: Optional[str] = None _agent_browser_resolved = False +# Lightpanda engine support — cached like _get_cloud_provider(). +# agent-browser v0.25.3+ supports ``--engine lightpanda`` natively. +_cached_browser_engine: Optional[str] = None +_browser_engine_resolved = False + def _get_cloud_provider() -> Optional[CloudBrowserProvider]: """Return the configured cloud browser provider, or None for local mode. @@ -489,6 +494,218 @@ _auto_local_for_private_urls_resolved = False _cached_auto_local_for_private_urls: bool = True +def _get_browser_engine() -> str: + """Return the configured browser engine (``auto``, ``lightpanda``, or ``chrome``). + + Reads ``config["browser"]["engine"]`` once and caches the result. + Falls back to the ``AGENT_BROWSER_ENGINE`` env var, then ``auto``. + + ``auto`` means: don't pass ``--engine`` at all (agent-browser defaults to + Chrome). ``lightpanda`` or ``chrome`` are forwarded as + ``--engine `` to agent-browser v0.25.3+. + + Lightpanda is 1.3-5.8x faster on navigation but has no graphical + renderer (no screenshots). + """ + global _cached_browser_engine, _browser_engine_resolved + if _browser_engine_resolved: + return _cached_browser_engine + + _browser_engine_resolved = True + _cached_browser_engine = "auto" # safe default + + # Config file takes priority + try: + from hermes_cli.config import read_raw_config + cfg = read_raw_config() + val = cfg.get("browser", {}).get("engine") + if val and str(val).strip(): + _cached_browser_engine = str(val).strip().lower() + except Exception as e: + logger.debug("Could not read browser.engine from config: %s", e) + + # Fall back to env var (only if config didn't set a value) + if _cached_browser_engine == "auto": + env_val = os.environ.get("AGENT_BROWSER_ENGINE", "").strip().lower() + if env_val: + _cached_browser_engine = env_val + + # Validate: agent-browser only accepts "chrome" and "lightpanda". + _VALID_ENGINES = {"auto", "lightpanda", "chrome"} + if _cached_browser_engine not in _VALID_ENGINES: + logger.warning( + "Unknown browser engine %r (valid: %s), falling back to 'auto'", + _cached_browser_engine, ", ".join(sorted(_VALID_ENGINES)), + ) + _cached_browser_engine = "auto" + + return _cached_browser_engine + + +def _should_inject_engine(engine: str) -> bool: + """Return True when the engine flag should be added to agent-browser commands. + + Only inject ``--engine`` for non-cloud, non-camofox local sessions where + the engine is explicitly set (not ``auto``). + """ + if engine == "auto": + return False + if _is_camofox_mode(): + return False + return _is_local_mode() + + +def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]) -> bool: + """Check if a Lightpanda result should trigger an automatic Chrome fallback. + + Returns True when: + - The engine is lightpanda AND + - The command is fallback-eligible (not close/record) AND + - The command failed, OR + - A snapshot came back empty/suspiciously short, OR + - A screenshot returned but is likely the Lightpanda placeholder PNG + """ + if engine != "lightpanda": + return False + + # Only retry commands where Chrome can meaningfully produce a different + # result. Session-management commands (close, record) are tied to the + # engine's daemon and can't be retried on a different engine. + _FALLBACK_ELIGIBLE = {"open", "snapshot", "screenshot", "eval", "click", + "fill", "scroll", "back", "press", "console", "errors"} + if command not in _FALLBACK_ELIGIBLE: + return False + + # Explicit failure + if not result.get("success"): + return True + + data = result.get("data", {}) + + if command == "snapshot": + snap = data.get("snapshot", "") + # Empty or near-empty snapshots indicate Lightpanda couldn't render + if not snap or len(snap.strip()) < 20: + return True + + if command == "screenshot": + # Lightpanda returns a placeholder PNG with its panda logo. + # Since LP PR #1766 resized it to 1920x1080, the placeholder is + # ~17 KB. Real Chromium screenshots are typically 100 KB+. + path = data.get("path", "") + if path: + try: + size = os.path.getsize(path) + if size < 20480: + logger.debug("Lightpanda screenshot is suspiciously small (%d bytes), " + "triggering Chrome fallback", size) + return True + except OSError: + return True # file doesn't exist or can't be read + + return False + + +def _chrome_fallback_screenshot( + task_id: str, + args: List[str], + timeout: int, +) -> Dict[str, Any]: + """Take a screenshot using a temporary Chrome session. + + When the active session uses Lightpanda, ``--engine chrome`` on the same + session has no effect — the engine is locked at daemon startup. This + helper spins up a **separate** Chrome session, navigates to the same URL + the agent is currently viewing, takes the screenshot, then tears down the + temporary session. + + Returns the screenshot result dict (same shape as ``_run_browser_command``). + """ + import uuid + + # 1. Grab the current URL from the Lightpanda session. + url_result = _run_browser_command(task_id, "eval", ["window.location.href"], timeout=10) + current_url = None + if url_result.get("success"): + current_url = url_result.get("data", {}).get("result", "").strip().strip('"').strip("'") + if not current_url: + logger.warning("Chrome fallback: could not determine current URL from LP session") + return {"success": False, "error": "Chrome fallback failed: could not determine current URL"} + + # 2. Create a temporary Chrome session (bypasses _get_session_info's cache). + tmp_session = f"h_cfb_{uuid.uuid4().hex[:8]}" + try: + browser_cmd = _find_agent_browser() + except FileNotFoundError as e: + return {"success": False, "error": str(e)} + + cmd_prefix = ["npx", "agent-browser"] if browser_cmd == "npx agent-browser" else [browser_cmd] + base_args = cmd_prefix + ["--engine", "chrome", "--session", tmp_session, "--json"] + + task_socket_dir = os.path.join(_socket_safe_tmpdir(), f"agent-browser-{tmp_session}") + os.makedirs(task_socket_dir, mode=0o700, exist_ok=True) + browser_env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir} + browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", "")) + + def _run_tmp(cmd: str, cmd_args: List[str]) -> Dict[str, Any]: + full = base_args + [cmd] + cmd_args + # Use temp-file stdout/stderr pattern (same as _run_browser_command) + # to avoid pipe hang from agent-browser daemon inheriting fds. + stdout_path = os.path.join(task_socket_dir, f"_stdout_{cmd}") + stderr_path = os.path.join(task_socket_dir, f"_stderr_{cmd}") + stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + try: + proc = subprocess.Popen( + full, stdout=stdout_fd, stderr=stderr_fd, + stdin=subprocess.DEVNULL, env=browser_env, + ) + finally: + os.close(stdout_fd) + os.close(stderr_fd) + try: + proc.wait(timeout=timeout) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait() + return {"success": False, "error": f"Chrome fallback '{cmd}' timed out"} + try: + with open(stdout_path, "r") as f: + stdout = f.read().strip() + if stdout: + return json.loads(stdout.split("\n")[-1]) + except Exception as exc: + logger.debug("Chrome fallback tmp cmd '%s' error: %s", cmd, exc) + finally: + for p in (stdout_path, stderr_path): + try: + os.unlink(p) + except OSError: + pass + return {"success": False, "error": f"Chrome fallback '{cmd}' failed"} + + try: + # 3. Navigate Chrome to the same URL. + nav = _run_tmp("open", [current_url]) + if not nav.get("success"): + logger.warning("Chrome fallback: navigate failed: %s", nav.get("error")) + return {"success": False, "error": f"Chrome fallback navigate failed: {nav.get('error')}"} + + # 4. Take the screenshot. + result = _run_tmp("screenshot", args) + return result + + finally: + # 5. Tear down the temporary Chrome session. + try: + _run_tmp("close", []) + except Exception: + pass + # Clean up socket directory + import shutil as _shutil + _shutil.rmtree(task_socket_dir, ignore_errors=True) + + def _auto_local_for_private_urls() -> bool: """Return whether a cloud-configured install should auto-spawn a local Chromium for LAN/localhost URLs. @@ -1371,6 +1588,7 @@ def _run_browser_command( command: str, args: List[str] = None, timeout: Optional[int] = None, + _engine_override: Optional[str] = None, ) -> Dict[str, Any]: """ Run an agent-browser CLI command using our pre-created Browserbase session. @@ -1381,6 +1599,9 @@ def _run_browser_command( args: Additional arguments for the command timeout: Command timeout in seconds. ``None`` reads ``browser.command_timeout`` from config (default 30s). + _engine_override: Force a specific engine for this call only. Used + internally by the Lightpanda fallback to retry with + Chrome without touching global state. Returns: Parsed JSON response from agent-browser @@ -1403,7 +1624,8 @@ def _run_browser_command( # Local mode with no Chromium on disk: fail fast with an actionable # message instead of hanging for _command_timeout seconds per call. - if _is_local_mode() and not _chromium_installed(): + # Skip when engine=lightpanda — LP doesn't need Chromium for navigation. + if _is_local_mode() and not _chromium_installed() and _get_browser_engine() != "lightpanda": if _running_in_docker(): hint = ( "Chromium browser is missing. You're running in Docker — pull " @@ -1443,6 +1665,11 @@ def _run_browser_command( # Local mode — launch a headless Chromium instance backend_args = ["--session", session_info["session_name"]] + # Lightpanda engine injection (local mode only, agent-browser v0.25.3+) + engine = _engine_override or _get_browser_engine() + if _should_inject_engine(engine): + backend_args += ["--engine", engine] + # Keep concrete executable paths intact, even when they contain spaces. # Only the synthetic npx fallback needs to expand into multiple argv items. cmd_prefix = ["npx", "agent-browser"] if browser_cmd == "npx agent-browser" else [browser_cmd] @@ -1539,87 +1766,105 @@ def _run_browser_command( proc.wait() logger.warning("browser '%s' timed out after %ds (task=%s, socket_dir=%s)", command, timeout, task_id, task_socket_dir) - return {"success": False, "error": f"Command timed out after {timeout} seconds"} + result = {"success": False, "error": f"Command timed out after {timeout} seconds"} + # Fall through to fallback check below + else: + with open(stdout_path, "r") as f: + stdout = f.read() + with open(stderr_path, "r") as f: + stderr = f.read() + returncode = proc.returncode - with open(stdout_path, "r") as f: - stdout = f.read() - with open(stderr_path, "r") as f: - stderr = f.read() - returncode = proc.returncode + # Clean up temp files (best-effort) + for p in (stdout_path, stderr_path): + try: + os.unlink(p) + except OSError: + pass - # Clean up temp files (best-effort) - for p in (stdout_path, stderr_path): - try: - os.unlink(p) - except OSError: - pass + # Log stderr for diagnostics — use warning level on failure so it's visible + if stderr and stderr.strip(): + level = logging.WARNING if returncode != 0 else logging.DEBUG + logger.log(level, "browser '%s' stderr: %s", command, stderr.strip()[:500]) + + stdout_text = stdout.strip() - # Log stderr for diagnostics — use warning level on failure so it's visible - if stderr and stderr.strip(): - level = logging.WARNING if returncode != 0 else logging.DEBUG - logger.log(level, "browser '%s' stderr: %s", command, stderr.strip()[:500]) - - stdout_text = stdout.strip() + # Empty output with rc=0 is a broken state — treat as failure rather + # than silently returning {"success": True, "data": {}}. + # Some commands (close, record) legitimately return no output. + if not stdout_text and returncode == 0 and command not in _EMPTY_OK_COMMANDS: + logger.warning("browser '%s' returned empty output (rc=0)", command) + result = {"success": False, "error": f"Browser command '{command}' returned no output"} + elif stdout_text: + try: + parsed = json.loads(stdout_text) + # Warn if snapshot came back empty (common sign of daemon/CDP issues) + if command == "snapshot" and parsed.get("success"): + snap_data = parsed.get("data", {}) + if not snap_data.get("snapshot") and not snap_data.get("refs"): + logger.warning("snapshot returned empty content. " + "Possible stale daemon or CDP connection issue. " + "returncode=%s", returncode) + result = parsed + except json.JSONDecodeError: + raw = stdout_text[:2000] + logger.warning("browser '%s' returned non-JSON output (rc=%s): %s", + command, returncode, raw[:500]) - # Empty output with rc=0 is a broken state — treat as failure rather - # than silently returning {"success": True, "data": {}}. - # Some commands (close, record) legitimately return no output. - if not stdout_text and returncode == 0 and command not in _EMPTY_OK_COMMANDS: - logger.warning("browser '%s' returned empty output (rc=0)", command) - return {"success": False, "error": f"Browser command '{command}' returned no output"} - - if stdout_text: - try: - parsed = json.loads(stdout_text) - # Warn if snapshot came back empty (common sign of daemon/CDP issues) - if command == "snapshot" and parsed.get("success"): - snap_data = parsed.get("data", {}) - if not snap_data.get("snapshot") and not snap_data.get("refs"): - logger.warning("snapshot returned empty content. " - "Possible stale daemon or CDP connection issue. " - "returncode=%s", returncode) - return parsed - except json.JSONDecodeError: - raw = stdout_text[:2000] - logger.warning("browser '%s' returned non-JSON output (rc=%s): %s", - command, returncode, raw[:500]) - - if command == "screenshot": - stderr_text = (stderr or "").strip() - combined_text = "\n".join( - part for part in [stdout_text, stderr_text] if part - ) - recovered_path = _extract_screenshot_path_from_text(combined_text) - - if recovered_path and Path(recovered_path).exists(): - logger.info( - "browser 'screenshot' recovered file from non-JSON output: %s", - recovered_path, + if command == "screenshot": + stderr_text = (stderr or "").strip() + combined_text = "\n".join( + part for part in [stdout_text, stderr_text] if part ) - return { - "success": True, - "data": { - "path": recovered_path, - "raw": raw, - }, - } + recovered_path = _extract_screenshot_path_from_text(combined_text) - return { - "success": False, - "error": f"Non-JSON output from agent-browser for '{command}': {raw}" - } - - # Check for errors - if returncode != 0: - error_msg = stderr.strip() if stderr else f"Command failed with code {returncode}" - logger.warning("browser '%s' failed (rc=%s): %s", command, returncode, error_msg[:300]) - return {"success": False, "error": error_msg} - - return {"success": True, "data": {}} + if recovered_path and Path(recovered_path).exists(): + logger.info( + "browser 'screenshot' recovered file from non-JSON output: %s", + recovered_path, + ) + result = { + "success": True, + "data": { + "path": recovered_path, + "raw": raw, + }, + } + else: + result = { + "success": False, + "error": f"Non-JSON output from agent-browser for '{command}': {raw}" + } + else: + result = { + "success": False, + "error": f"Non-JSON output from agent-browser for '{command}': {raw}" + } + elif returncode != 0: + # Check for errors + error_msg = stderr.strip() if stderr else f"Command failed with code {returncode}" + logger.warning("browser '%s' failed (rc=%s): %s", command, returncode, error_msg[:300]) + result = {"success": False, "error": error_msg} + else: + result = {"success": True, "data": {}} except Exception as e: logger.warning("browser '%s' exception: %s", command, e, exc_info=True) - return {"success": False, "error": str(e)} + result = {"success": False, "error": str(e)} + + # --- Lightpanda automatic Chrome fallback --- + # If engine is lightpanda and the result looks broken, retry with Chrome. + # This runs for ALL exit paths (timeout, empty, non-JSON, nonzero rc, parsed). + if _needs_lightpanda_fallback(engine, command, result): + logger.info("Lightpanda fallback: retrying '%s' with Chrome (task=%s)", command, task_id) + # For screenshots, use the dedicated Chrome fallback helper + # (spins up a separate Chrome session to the same URL). + if command == "screenshot": + return _chrome_fallback_screenshot(task_id, args or [], timeout) + # For other commands, re-run with engine forced to "auto" (Chrome). + return _run_browser_command(task_id, command, args, timeout, _engine_override="auto") + + return result def _extract_relevant_content( @@ -2399,6 +2644,49 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] import base64 import uuid as uuid_mod effective_task_id = _last_session_key(task_id or "default") + + # Lightpanda has no graphical renderer — pre-route screenshots to Chrome + # via the fallback helper instead of letting the normal path fail with a + # CDP error or return a placeholder PNG. + engine = _get_browser_engine() + _lp_prerouted = False + if engine == "lightpanda" and _should_inject_engine(engine): + logger.debug("browser_vision: pre-routing screenshot to Chrome (engine=lightpanda)") + screenshot_args = [] + if annotate: + screenshot_args.append("--annotate") + fb_result = _chrome_fallback_screenshot( + effective_task_id, screenshot_args, _get_command_timeout(), + ) + if fb_result.get("success"): + # Proceed with the Chrome screenshot for vision analysis + fb_path = fb_result.get("data", {}).get("path", "") + if fb_path and os.path.exists(fb_path): + try: + with open(fb_path, "rb") as f: + image_data = base64.b64encode(f.read()).decode("utf-8") + analysis = call_llm( + f"Analyze this browser screenshot and answer: {question}", + images=[{"data": image_data, "media_type": "image/png"}], + task="vision", + ) + from hermes_constants import get_hermes_dir + screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots") + screenshots_dir.mkdir(parents=True, exist_ok=True) + # Copy to persistent location + import shutil as _shutil_vision + persistent_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png" + _shutil_vision.copy2(fb_path, persistent_path) + return json.dumps({ + "analysis": analysis, + "screenshot_path": str(persistent_path), + }) + except Exception as e: + logger.warning("Lightpanda Chrome fallback vision failed: %s", e) + # Fall through to normal path as last resort + # Mark that we already tried the Chrome fallback, so the normal + # _run_browser_command path doesn't trigger it a second time. + _lp_prerouted = True # Save screenshot to persistent location so it can be shared with users from hermes_constants import get_hermes_dir @@ -2421,6 +2709,9 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] effective_task_id, "screenshot", screenshot_args, + # If the Lightpanda pre-route already failed, force Chrome so + # _run_browser_command doesn't trigger a redundant LP fallback. + _engine_override="auto" if _lp_prerouted else None, ) if not result.get("success"): @@ -2738,12 +3029,15 @@ def cleanup_all_browsers() -> None: global _cached_agent_browser, _agent_browser_resolved global _cached_command_timeout, _command_timeout_resolved global _cached_chromium_installed + global _cached_browser_engine, _browser_engine_resolved _cached_agent_browser = None _agent_browser_resolved = False _discover_homebrew_node_dirs.cache_clear() _cached_command_timeout = None _command_timeout_resolved = False _cached_chromium_installed = None + _cached_browser_engine = None + _browser_engine_resolved = False # ============================================================================ # Requirements Check