mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
feat(browser): add Lightpanda engine support with automatic Chrome fallback
Add Lightpanda as an optional browser engine for local mode.
Lightpanda is a headless browser built from scratch in Zig -- faster
navigation than Chrome with significantly less memory.
One config line to enable:
browser:
engine: lightpanda
New functions in browser_tool.py:
- _get_browser_engine() -- config/env reader with validation + caching
- _should_inject_engine() -- only inject in local non-cloud mode
- _needs_lightpanda_fallback() -- detect empty/failed LP results
- _chrome_fallback_screenshot() -- temporary Chrome session for screenshots
- Engine injection in _run_browser_command (--engine flag)
- browser_vision pre-routes screenshots to Chrome when engine=lightpanda
Config:
- browser.engine in DEFAULT_CONFIG (auto/lightpanda/chrome)
- AGENT_BROWSER_ENGINE in OPTIONAL_ENV_VARS
- /browser status shows engine info in local mode
Rebased from PR #7144 onto current main. All existing code preserved --
pure additions only (+520/-2).
25 new tests + 81 total browser tests pass (0 failures).
This commit is contained in:
parent
aa88dcc57b
commit
395dbcc873
5 changed files with 770 additions and 74 deletions
|
|
@ -244,6 +244,15 @@ BROWSERBASE_PROXIES=true
|
||||||
# Uses custom Chromium build to avoid bot detection altogether
|
# Uses custom Chromium build to avoid bot detection altogether
|
||||||
BROWSERBASE_ADVANCED_STEALTH=false
|
BROWSERBASE_ADVANCED_STEALTH=false
|
||||||
|
|
||||||
|
# Browser engine for local mode (default: auto = Chrome)
|
||||||
|
# "auto" — use Chrome (don't pass --engine flag)
|
||||||
|
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||||
|
# "chrome" — explicitly request Chrome
|
||||||
|
# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
|
||||||
|
# empty results are automatically retried with Chrome.
|
||||||
|
# Also configurable via browser.engine in config.yaml.
|
||||||
|
# AGENT_BROWSER_ENGINE=auto
|
||||||
|
|
||||||
# Browser session timeout in seconds (default: 300)
|
# Browser session timeout in seconds (default: 300)
|
||||||
# Sessions are cleaned up after this duration of inactivity
|
# Sessions are cleaned up after this duration of inactivity
|
||||||
BROWSER_SESSION_TIMEOUT=300
|
BROWSER_SESSION_TIMEOUT=300
|
||||||
|
|
|
||||||
16
cli.py
16
cli.py
|
|
@ -298,6 +298,7 @@ def load_cli_config() -> Dict[str, Any]:
|
||||||
"browser": {
|
"browser": {
|
||||||
"inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min
|
"inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min
|
||||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||||
|
"engine": "auto", # Browser engine: auto (Chrome), lightpanda, chrome
|
||||||
},
|
},
|
||||||
"compression": {
|
"compression": {
|
||||||
"enabled": True, # Auto-compress when approaching context limit
|
"enabled": True, # Auto-compress when approaching context limit
|
||||||
|
|
@ -7131,7 +7132,20 @@ class HermesCLI:
|
||||||
if provider is not None:
|
if provider is not None:
|
||||||
print(f"🌐 Browser: {provider.provider_name()} (cloud)")
|
print(f"🌐 Browser: {provider.provider_name()} (cloud)")
|
||||||
else:
|
else:
|
||||||
print("🌐 Browser: local headless Chromium (agent-browser)")
|
# Show engine info for local mode
|
||||||
|
try:
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
engine = _get_browser_engine()
|
||||||
|
except Exception:
|
||||||
|
engine = "auto"
|
||||||
|
if engine == "lightpanda":
|
||||||
|
print("🌐 Browser: local Lightpanda (agent-browser --engine lightpanda)")
|
||||||
|
print(" ⚡ Lightpanda: faster navigation, no screenshot support")
|
||||||
|
print(" Automatic Chrome fallback for screenshots and failed commands")
|
||||||
|
elif engine == "chrome":
|
||||||
|
print("🌐 Browser: local headless Chrome (agent-browser --engine chrome)")
|
||||||
|
else:
|
||||||
|
print("🌐 Browser: local headless Chromium (agent-browser)")
|
||||||
print()
|
print()
|
||||||
print(" /browser connect — connect to your live Chrome")
|
print(" /browser connect — connect to your live Chrome")
|
||||||
print(" /browser disconnect — revert to default")
|
print(" /browser disconnect — revert to default")
|
||||||
|
|
|
||||||
|
|
@ -550,6 +550,13 @@ DEFAULT_CONFIG = {
|
||||||
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
|
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
|
||||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||||
|
# Browser engine for local mode. Passed as ``--engine <value>`` to
|
||||||
|
# agent-browser v0.25.3+.
|
||||||
|
# "auto" — use Chrome (default, don't pass --engine at all)
|
||||||
|
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||||
|
# "chrome" — explicitly request Chrome
|
||||||
|
# Also settable via AGENT_BROWSER_ENGINE env var.
|
||||||
|
"engine": "auto",
|
||||||
"auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
|
"auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
|
||||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||||
|
|
@ -1827,6 +1834,15 @@ OPTIONAL_ENV_VARS = {
|
||||||
"password": False,
|
"password": False,
|
||||||
"category": "tool",
|
"category": "tool",
|
||||||
},
|
},
|
||||||
|
"AGENT_BROWSER_ENGINE": {
|
||||||
|
"description": "Browser engine for local mode: auto (default Chrome), lightpanda (faster, no screenshots), chrome",
|
||||||
|
"prompt": "Browser engine (auto/lightpanda/chrome)",
|
||||||
|
"url": "https://github.com/vercel-labs/agent-browser",
|
||||||
|
"tools": ["browser_navigate", "browser_snapshot", "browser_click", "browser_vision"],
|
||||||
|
"password": False,
|
||||||
|
"category": "tool",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
"CAMOFOX_URL": {
|
"CAMOFOX_URL": {
|
||||||
"description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
|
"description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
|
||||||
"prompt": "Camofox server URL",
|
"prompt": "Camofox server URL",
|
||||||
|
|
|
||||||
363
tests/tools/test_browser_lightpanda.py
Normal file
363
tests/tools/test_browser_lightpanda.py
Normal file
|
|
@ -0,0 +1,363 @@
|
||||||
|
"""Tests for Lightpanda engine support in browser_tool.py."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _reset_engine_cache():
|
||||||
|
"""Reset the module-level engine cache so tests start clean."""
|
||||||
|
import tools.browser_tool as bt
|
||||||
|
bt._cached_browser_engine = None
|
||||||
|
bt._browser_engine_resolved = False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _clean_engine_cache():
|
||||||
|
"""Reset engine cache before and after each test."""
|
||||||
|
_reset_engine_cache()
|
||||||
|
yield
|
||||||
|
_reset_engine_cache()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _get_browser_engine
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestGetBrowserEngine:
|
||||||
|
"""Test engine resolution from config and env vars."""
|
||||||
|
|
||||||
|
def test_default_is_auto(self):
|
||||||
|
"""With no config or env var, engine defaults to 'auto'."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
with patch.dict(os.environ, {}, clear=False):
|
||||||
|
os.environ.pop("AGENT_BROWSER_ENGINE", None)
|
||||||
|
with patch("hermes_cli.config.read_raw_config", return_value={}):
|
||||||
|
assert _get_browser_engine() == "auto"
|
||||||
|
|
||||||
|
def test_config_lightpanda(self):
|
||||||
|
"""Config browser.engine = 'lightpanda' is respected."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
cfg = {"browser": {"engine": "lightpanda"}}
|
||||||
|
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
|
||||||
|
assert _get_browser_engine() == "lightpanda"
|
||||||
|
|
||||||
|
def test_config_chrome(self):
|
||||||
|
"""Config browser.engine = 'chrome' is respected."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
cfg = {"browser": {"engine": "chrome"}}
|
||||||
|
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
|
||||||
|
assert _get_browser_engine() == "chrome"
|
||||||
|
|
||||||
|
def test_env_var_fallback(self):
|
||||||
|
"""AGENT_BROWSER_ENGINE env var is used when config has no engine key."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
with patch.dict(os.environ, {"AGENT_BROWSER_ENGINE": "lightpanda"}):
|
||||||
|
with patch("hermes_cli.config.read_raw_config", return_value={}):
|
||||||
|
assert _get_browser_engine() == "lightpanda"
|
||||||
|
|
||||||
|
def test_config_takes_priority_over_env(self):
|
||||||
|
"""Config value wins over env var."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
cfg = {"browser": {"engine": "chrome"}}
|
||||||
|
with patch.dict(os.environ, {"AGENT_BROWSER_ENGINE": "lightpanda"}):
|
||||||
|
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
|
||||||
|
assert _get_browser_engine() == "chrome"
|
||||||
|
|
||||||
|
def test_value_is_lowercased(self):
|
||||||
|
"""Engine value is normalized to lowercase."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
cfg = {"browser": {"engine": "Lightpanda"}}
|
||||||
|
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
|
||||||
|
assert _get_browser_engine() == "lightpanda"
|
||||||
|
|
||||||
|
def test_invalid_engine_falls_back_to_auto(self):
|
||||||
|
"""Unknown engine values are rejected and fall back to 'auto'."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
cfg = {"browser": {"engine": "firefox"}}
|
||||||
|
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
|
||||||
|
assert _get_browser_engine() == "auto"
|
||||||
|
|
||||||
|
def test_caching(self):
|
||||||
|
"""Result is cached — second call doesn't re-read config."""
|
||||||
|
from tools.browser_tool import _get_browser_engine
|
||||||
|
mock_read = MagicMock(return_value={"browser": {"engine": "lightpanda"}})
|
||||||
|
with patch("hermes_cli.config.read_raw_config", mock_read):
|
||||||
|
assert _get_browser_engine() == "lightpanda"
|
||||||
|
assert _get_browser_engine() == "lightpanda"
|
||||||
|
mock_read.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _should_inject_engine
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestShouldInjectEngine:
|
||||||
|
"""Test whether --engine flag is injected based on mode."""
|
||||||
|
|
||||||
|
def test_auto_never_injects(self):
|
||||||
|
from tools.browser_tool import _should_inject_engine
|
||||||
|
assert _should_inject_engine("auto") is False
|
||||||
|
|
||||||
|
def test_lightpanda_injects_in_local_mode(self):
|
||||||
|
from tools.browser_tool import _should_inject_engine
|
||||||
|
with patch("tools.browser_tool._is_camofox_mode", return_value=False), \
|
||||||
|
patch("tools.browser_tool._get_cdp_override", return_value=""), \
|
||||||
|
patch("tools.browser_tool._get_cloud_provider", return_value=None):
|
||||||
|
assert _should_inject_engine("lightpanda") is True
|
||||||
|
|
||||||
|
def test_chrome_injects_in_local_mode(self):
|
||||||
|
from tools.browser_tool import _should_inject_engine
|
||||||
|
with patch("tools.browser_tool._is_camofox_mode", return_value=False), \
|
||||||
|
patch("tools.browser_tool._get_cdp_override", return_value=""), \
|
||||||
|
patch("tools.browser_tool._get_cloud_provider", return_value=None):
|
||||||
|
assert _should_inject_engine("chrome") is True
|
||||||
|
|
||||||
|
def test_no_inject_in_camofox_mode(self):
|
||||||
|
from tools.browser_tool import _should_inject_engine
|
||||||
|
with patch("tools.browser_tool._is_camofox_mode", return_value=True):
|
||||||
|
assert _should_inject_engine("lightpanda") is False
|
||||||
|
|
||||||
|
def test_no_inject_with_cdp_override(self):
|
||||||
|
from tools.browser_tool import _should_inject_engine
|
||||||
|
with patch("tools.browser_tool._is_camofox_mode", return_value=False), \
|
||||||
|
patch("tools.browser_tool._get_cdp_override", return_value="ws://localhost:9222"):
|
||||||
|
assert _should_inject_engine("lightpanda") is False
|
||||||
|
|
||||||
|
def test_no_inject_with_cloud_provider(self):
|
||||||
|
from tools.browser_tool import _should_inject_engine
|
||||||
|
mock_provider = MagicMock()
|
||||||
|
with patch("tools.browser_tool._is_camofox_mode", return_value=False), \
|
||||||
|
patch("tools.browser_tool._get_cdp_override", return_value=""), \
|
||||||
|
patch("tools.browser_tool._get_cloud_provider", return_value=mock_provider):
|
||||||
|
assert _should_inject_engine("lightpanda") is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _needs_lightpanda_fallback
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestNeedsLightpandaFallback:
|
||||||
|
"""Test fallback detection for Lightpanda results."""
|
||||||
|
|
||||||
|
def test_non_lightpanda_never_falls_back(self):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": False, "error": "timeout"}
|
||||||
|
assert _needs_lightpanda_fallback("chrome", "open", result) is False
|
||||||
|
assert _needs_lightpanda_fallback("auto", "open", result) is False
|
||||||
|
|
||||||
|
def test_failed_command_triggers_fallback(self):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": False, "error": "page.goto: Timeout"}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "open", result) is True
|
||||||
|
|
||||||
|
def test_empty_snapshot_triggers_fallback(self):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": True, "data": {"snapshot": ""}}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "snapshot", result) is True
|
||||||
|
|
||||||
|
def test_short_snapshot_triggers_fallback(self):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": True, "data": {"snapshot": "- none"}}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "snapshot", result) is True
|
||||||
|
|
||||||
|
def test_normal_snapshot_does_not_trigger(self):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": True, "data": {
|
||||||
|
"snapshot": '- heading "Example Domain" [ref=e1]\n- link "Learn more" [ref=e2]'
|
||||||
|
}}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "snapshot", result) is False
|
||||||
|
|
||||||
|
def test_small_screenshot_triggers_fallback(self, tmp_path):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
# Create a tiny file simulating the Lightpanda placeholder PNG
|
||||||
|
placeholder = tmp_path / "placeholder.png"
|
||||||
|
placeholder.write_bytes(b"\x89PNG" + b"\x00" * 2000) # ~2KB
|
||||||
|
result = {"success": True, "data": {"path": str(placeholder)}}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "screenshot", result) is True
|
||||||
|
|
||||||
|
def test_actual_placeholder_size_triggers_fallback(self, tmp_path):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
# Lightpanda PR #1766 resized the placeholder to 1920x1080 (~17 KB)
|
||||||
|
placeholder = tmp_path / "placeholder_1920.png"
|
||||||
|
placeholder.write_bytes(b"\x89PNG" + b"\x00" * 16693) # actual measured: 16697 bytes
|
||||||
|
result = {"success": True, "data": {"path": str(placeholder)}}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "screenshot", result) is True
|
||||||
|
|
||||||
|
def test_normal_screenshot_does_not_trigger(self, tmp_path):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
# Create a larger file simulating a real Chrome screenshot
|
||||||
|
real_screenshot = tmp_path / "real.png"
|
||||||
|
real_screenshot.write_bytes(b"\x89PNG" + b"\x00" * 50_000) # ~50KB
|
||||||
|
result = {"success": True, "data": {"path": str(real_screenshot)}}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "screenshot", result) is False
|
||||||
|
|
||||||
|
def test_successful_open_does_not_trigger(self):
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": True, "data": {"title": "Example", "url": "https://example.com"}}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "open", result) is False
|
||||||
|
|
||||||
|
def test_close_command_never_triggers_fallback(self):
|
||||||
|
"""Session-management commands like 'close' are not fallback-eligible."""
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": False, "error": "session closed"}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "close", result) is False
|
||||||
|
|
||||||
|
def test_record_command_never_triggers_fallback(self):
|
||||||
|
"""The 'record' command is tied to the engine daemon — not retryable."""
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": False, "error": "recording failed"}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "record", result) is False
|
||||||
|
|
||||||
|
def test_unknown_command_does_not_trigger_fallback(self):
|
||||||
|
"""Commands not in the whitelist should not trigger fallback."""
|
||||||
|
from tools.browser_tool import _needs_lightpanda_fallback
|
||||||
|
result = {"success": False, "error": "nope"}
|
||||||
|
assert _needs_lightpanda_fallback("lightpanda", "some_future_cmd", result) is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Config integration
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestConfigIntegration:
|
||||||
|
"""Verify engine config is in DEFAULT_CONFIG."""
|
||||||
|
|
||||||
|
def test_engine_in_default_config(self):
|
||||||
|
from hermes_cli.config import DEFAULT_CONFIG
|
||||||
|
assert "engine" in DEFAULT_CONFIG["browser"]
|
||||||
|
assert DEFAULT_CONFIG["browser"]["engine"] == "auto"
|
||||||
|
|
||||||
|
def test_env_var_registered(self):
|
||||||
|
from hermes_cli.config import OPTIONAL_ENV_VARS
|
||||||
|
assert "AGENT_BROWSER_ENGINE" in OPTIONAL_ENV_VARS
|
||||||
|
entry = OPTIONAL_ENV_VARS["AGENT_BROWSER_ENGINE"]
|
||||||
|
assert entry["category"] == "tool"
|
||||||
|
assert entry["advanced"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# cleanup_all_browsers resets engine cache
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestCleanupResetsEngineCache:
|
||||||
|
"""Verify cleanup_all_browsers resets engine-related globals."""
|
||||||
|
|
||||||
|
def test_engine_cache_reset(self):
|
||||||
|
import tools.browser_tool as bt
|
||||||
|
# Seed the cache
|
||||||
|
bt._cached_browser_engine = "lightpanda"
|
||||||
|
bt._browser_engine_resolved = True
|
||||||
|
# cleanup should reset them
|
||||||
|
bt.cleanup_all_browsers()
|
||||||
|
assert bt._cached_browser_engine is None
|
||||||
|
assert bt._browser_engine_resolved is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _engine_override parameter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestEngineOverride:
|
||||||
|
"""Verify _engine_override bypasses the cached engine."""
|
||||||
|
|
||||||
|
@patch("tools.browser_tool._get_session_info")
|
||||||
|
@patch("tools.browser_tool._find_agent_browser", return_value="/usr/bin/agent-browser")
|
||||||
|
@patch("tools.browser_tool._is_local_mode", return_value=True)
|
||||||
|
@patch("tools.browser_tool._chromium_installed", return_value=True)
|
||||||
|
@patch("tools.browser_tool._get_cloud_provider", return_value=None)
|
||||||
|
@patch("tools.browser_tool._get_cdp_override", return_value="")
|
||||||
|
@patch("tools.browser_tool._is_camofox_mode", return_value=False)
|
||||||
|
def test_override_prevents_engine_injection(
|
||||||
|
self, _camofox, _cdp, _cloud, _chromium, _local, _find, _session
|
||||||
|
):
|
||||||
|
"""When _engine_override='auto', --engine flag is NOT injected."""
|
||||||
|
import tools.browser_tool as bt
|
||||||
|
|
||||||
|
# Set the global cache to lightpanda
|
||||||
|
bt._cached_browser_engine = "lightpanda"
|
||||||
|
bt._browser_engine_resolved = True
|
||||||
|
|
||||||
|
_session.return_value = {"session_name": "test-sess"}
|
||||||
|
|
||||||
|
# Track the cmd_parts that Popen receives
|
||||||
|
captured_cmds = []
|
||||||
|
mock_proc = MagicMock()
|
||||||
|
mock_proc.wait.return_value = None
|
||||||
|
mock_proc.returncode = 0
|
||||||
|
|
||||||
|
def capture_popen(cmd, **kwargs):
|
||||||
|
captured_cmds.append(cmd)
|
||||||
|
return mock_proc
|
||||||
|
|
||||||
|
# We need to mock the file operations too
|
||||||
|
with patch("subprocess.Popen", side_effect=capture_popen), \
|
||||||
|
patch("os.open", return_value=99), \
|
||||||
|
patch("os.close"), \
|
||||||
|
patch("os.unlink"), \
|
||||||
|
patch("os.makedirs"), \
|
||||||
|
patch("builtins.open", MagicMock(return_value=MagicMock(
|
||||||
|
__enter__=MagicMock(return_value=MagicMock(read=MagicMock(return_value='{"success": true, "data": {}}'))),
|
||||||
|
__exit__=MagicMock(return_value=False),
|
||||||
|
))), \
|
||||||
|
patch("tools.interrupt.is_interrupted", return_value=False), \
|
||||||
|
patch("tools.browser_tool._write_owner_pid"):
|
||||||
|
bt._run_browser_command("task1", "snapshot", [], _engine_override="auto")
|
||||||
|
|
||||||
|
# Should NOT contain "--engine" since override is "auto"
|
||||||
|
assert len(captured_cmds) == 1
|
||||||
|
assert "--engine" not in captured_cmds[0]
|
||||||
|
|
||||||
|
@patch("tools.browser_tool._get_session_info")
|
||||||
|
@patch("tools.browser_tool._find_agent_browser", return_value="/usr/bin/agent-browser")
|
||||||
|
@patch("tools.browser_tool._is_local_mode", return_value=True)
|
||||||
|
@patch("tools.browser_tool._chromium_installed", return_value=True)
|
||||||
|
@patch("tools.browser_tool._get_cloud_provider", return_value=None)
|
||||||
|
@patch("tools.browser_tool._get_cdp_override", return_value="")
|
||||||
|
@patch("tools.browser_tool._is_camofox_mode", return_value=False)
|
||||||
|
def test_no_override_uses_cached_engine(
|
||||||
|
self, _camofox, _cdp, _cloud, _chromium, _local, _find, _session
|
||||||
|
):
|
||||||
|
"""Without _engine_override, the cached engine is used."""
|
||||||
|
import tools.browser_tool as bt
|
||||||
|
|
||||||
|
bt._cached_browser_engine = "lightpanda"
|
||||||
|
bt._browser_engine_resolved = True
|
||||||
|
|
||||||
|
_session.return_value = {"session_name": "test-sess"}
|
||||||
|
|
||||||
|
captured_cmds = []
|
||||||
|
mock_proc = MagicMock()
|
||||||
|
mock_proc.wait.return_value = None
|
||||||
|
mock_proc.returncode = 0
|
||||||
|
|
||||||
|
def capture_popen(cmd, **kwargs):
|
||||||
|
captured_cmds.append(cmd)
|
||||||
|
return mock_proc
|
||||||
|
|
||||||
|
# Return a substantive snapshot so the LP fallback does NOT trigger.
|
||||||
|
mock_stdout = '{"success": true, "data": {"snapshot": "- heading \\"Hello\\" [ref=e1]", "refs": {"e1": {}}}}'
|
||||||
|
with patch("subprocess.Popen", side_effect=capture_popen), \
|
||||||
|
patch("os.open", return_value=99), \
|
||||||
|
patch("os.close"), \
|
||||||
|
patch("os.unlink"), \
|
||||||
|
patch("os.makedirs"), \
|
||||||
|
patch("builtins.open", MagicMock(return_value=MagicMock(
|
||||||
|
__enter__=MagicMock(return_value=MagicMock(read=MagicMock(return_value=mock_stdout))),
|
||||||
|
__exit__=MagicMock(return_value=False),
|
||||||
|
))), \
|
||||||
|
patch("tools.interrupt.is_interrupted", return_value=False), \
|
||||||
|
patch("tools.browser_tool._write_owner_pid"):
|
||||||
|
bt._run_browser_command("task1", "snapshot", [])
|
||||||
|
|
||||||
|
# SHOULD contain "--engine lightpanda"
|
||||||
|
assert len(captured_cmds) == 1
|
||||||
|
assert "--engine" in captured_cmds[0]
|
||||||
|
engine_idx = captured_cmds[0].index("--engine")
|
||||||
|
assert captured_cmds[0][engine_idx + 1] == "lightpanda"
|
||||||
|
|
@ -400,6 +400,11 @@ _cached_allow_private_urls: Optional[bool] = None
|
||||||
_cached_agent_browser: Optional[str] = None
|
_cached_agent_browser: Optional[str] = None
|
||||||
_agent_browser_resolved = False
|
_agent_browser_resolved = False
|
||||||
|
|
||||||
|
# Lightpanda engine support — cached like _get_cloud_provider().
|
||||||
|
# agent-browser v0.25.3+ supports ``--engine lightpanda`` natively.
|
||||||
|
_cached_browser_engine: Optional[str] = None
|
||||||
|
_browser_engine_resolved = False
|
||||||
|
|
||||||
|
|
||||||
def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
|
def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
|
||||||
"""Return the configured cloud browser provider, or None for local mode.
|
"""Return the configured cloud browser provider, or None for local mode.
|
||||||
|
|
@ -489,6 +494,218 @@ _auto_local_for_private_urls_resolved = False
|
||||||
_cached_auto_local_for_private_urls: bool = True
|
_cached_auto_local_for_private_urls: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
def _get_browser_engine() -> str:
|
||||||
|
"""Return the configured browser engine (``auto``, ``lightpanda``, or ``chrome``).
|
||||||
|
|
||||||
|
Reads ``config["browser"]["engine"]`` once and caches the result.
|
||||||
|
Falls back to the ``AGENT_BROWSER_ENGINE`` env var, then ``auto``.
|
||||||
|
|
||||||
|
``auto`` means: don't pass ``--engine`` at all (agent-browser defaults to
|
||||||
|
Chrome). ``lightpanda`` or ``chrome`` are forwarded as
|
||||||
|
``--engine <value>`` to agent-browser v0.25.3+.
|
||||||
|
|
||||||
|
Lightpanda is 1.3-5.8x faster on navigation but has no graphical
|
||||||
|
renderer (no screenshots).
|
||||||
|
"""
|
||||||
|
global _cached_browser_engine, _browser_engine_resolved
|
||||||
|
if _browser_engine_resolved:
|
||||||
|
return _cached_browser_engine
|
||||||
|
|
||||||
|
_browser_engine_resolved = True
|
||||||
|
_cached_browser_engine = "auto" # safe default
|
||||||
|
|
||||||
|
# Config file takes priority
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import read_raw_config
|
||||||
|
cfg = read_raw_config()
|
||||||
|
val = cfg.get("browser", {}).get("engine")
|
||||||
|
if val and str(val).strip():
|
||||||
|
_cached_browser_engine = str(val).strip().lower()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Could not read browser.engine from config: %s", e)
|
||||||
|
|
||||||
|
# Fall back to env var (only if config didn't set a value)
|
||||||
|
if _cached_browser_engine == "auto":
|
||||||
|
env_val = os.environ.get("AGENT_BROWSER_ENGINE", "").strip().lower()
|
||||||
|
if env_val:
|
||||||
|
_cached_browser_engine = env_val
|
||||||
|
|
||||||
|
# Validate: agent-browser only accepts "chrome" and "lightpanda".
|
||||||
|
_VALID_ENGINES = {"auto", "lightpanda", "chrome"}
|
||||||
|
if _cached_browser_engine not in _VALID_ENGINES:
|
||||||
|
logger.warning(
|
||||||
|
"Unknown browser engine %r (valid: %s), falling back to 'auto'",
|
||||||
|
_cached_browser_engine, ", ".join(sorted(_VALID_ENGINES)),
|
||||||
|
)
|
||||||
|
_cached_browser_engine = "auto"
|
||||||
|
|
||||||
|
return _cached_browser_engine
|
||||||
|
|
||||||
|
|
||||||
|
def _should_inject_engine(engine: str) -> bool:
|
||||||
|
"""Return True when the engine flag should be added to agent-browser commands.
|
||||||
|
|
||||||
|
Only inject ``--engine`` for non-cloud, non-camofox local sessions where
|
||||||
|
the engine is explicitly set (not ``auto``).
|
||||||
|
"""
|
||||||
|
if engine == "auto":
|
||||||
|
return False
|
||||||
|
if _is_camofox_mode():
|
||||||
|
return False
|
||||||
|
return _is_local_mode()
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]) -> bool:
|
||||||
|
"""Check if a Lightpanda result should trigger an automatic Chrome fallback.
|
||||||
|
|
||||||
|
Returns True when:
|
||||||
|
- The engine is lightpanda AND
|
||||||
|
- The command is fallback-eligible (not close/record) AND
|
||||||
|
- The command failed, OR
|
||||||
|
- A snapshot came back empty/suspiciously short, OR
|
||||||
|
- A screenshot returned but is likely the Lightpanda placeholder PNG
|
||||||
|
"""
|
||||||
|
if engine != "lightpanda":
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Only retry commands where Chrome can meaningfully produce a different
|
||||||
|
# result. Session-management commands (close, record) are tied to the
|
||||||
|
# engine's daemon and can't be retried on a different engine.
|
||||||
|
_FALLBACK_ELIGIBLE = {"open", "snapshot", "screenshot", "eval", "click",
|
||||||
|
"fill", "scroll", "back", "press", "console", "errors"}
|
||||||
|
if command not in _FALLBACK_ELIGIBLE:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Explicit failure
|
||||||
|
if not result.get("success"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
data = result.get("data", {})
|
||||||
|
|
||||||
|
if command == "snapshot":
|
||||||
|
snap = data.get("snapshot", "")
|
||||||
|
# Empty or near-empty snapshots indicate Lightpanda couldn't render
|
||||||
|
if not snap or len(snap.strip()) < 20:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if command == "screenshot":
|
||||||
|
# Lightpanda returns a placeholder PNG with its panda logo.
|
||||||
|
# Since LP PR #1766 resized it to 1920x1080, the placeholder is
|
||||||
|
# ~17 KB. Real Chromium screenshots are typically 100 KB+.
|
||||||
|
path = data.get("path", "")
|
||||||
|
if path:
|
||||||
|
try:
|
||||||
|
size = os.path.getsize(path)
|
||||||
|
if size < 20480:
|
||||||
|
logger.debug("Lightpanda screenshot is suspiciously small (%d bytes), "
|
||||||
|
"triggering Chrome fallback", size)
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return True # file doesn't exist or can't be read
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _chrome_fallback_screenshot(
|
||||||
|
task_id: str,
|
||||||
|
args: List[str],
|
||||||
|
timeout: int,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Take a screenshot using a temporary Chrome session.
|
||||||
|
|
||||||
|
When the active session uses Lightpanda, ``--engine chrome`` on the same
|
||||||
|
session has no effect — the engine is locked at daemon startup. This
|
||||||
|
helper spins up a **separate** Chrome session, navigates to the same URL
|
||||||
|
the agent is currently viewing, takes the screenshot, then tears down the
|
||||||
|
temporary session.
|
||||||
|
|
||||||
|
Returns the screenshot result dict (same shape as ``_run_browser_command``).
|
||||||
|
"""
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
# 1. Grab the current URL from the Lightpanda session.
|
||||||
|
url_result = _run_browser_command(task_id, "eval", ["window.location.href"], timeout=10)
|
||||||
|
current_url = None
|
||||||
|
if url_result.get("success"):
|
||||||
|
current_url = url_result.get("data", {}).get("result", "").strip().strip('"').strip("'")
|
||||||
|
if not current_url:
|
||||||
|
logger.warning("Chrome fallback: could not determine current URL from LP session")
|
||||||
|
return {"success": False, "error": "Chrome fallback failed: could not determine current URL"}
|
||||||
|
|
||||||
|
# 2. Create a temporary Chrome session (bypasses _get_session_info's cache).
|
||||||
|
tmp_session = f"h_cfb_{uuid.uuid4().hex[:8]}"
|
||||||
|
try:
|
||||||
|
browser_cmd = _find_agent_browser()
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
cmd_prefix = ["npx", "agent-browser"] if browser_cmd == "npx agent-browser" else [browser_cmd]
|
||||||
|
base_args = cmd_prefix + ["--engine", "chrome", "--session", tmp_session, "--json"]
|
||||||
|
|
||||||
|
task_socket_dir = os.path.join(_socket_safe_tmpdir(), f"agent-browser-{tmp_session}")
|
||||||
|
os.makedirs(task_socket_dir, mode=0o700, exist_ok=True)
|
||||||
|
browser_env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
|
||||||
|
browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", ""))
|
||||||
|
|
||||||
|
def _run_tmp(cmd: str, cmd_args: List[str]) -> Dict[str, Any]:
|
||||||
|
full = base_args + [cmd] + cmd_args
|
||||||
|
# Use temp-file stdout/stderr pattern (same as _run_browser_command)
|
||||||
|
# to avoid pipe hang from agent-browser daemon inheriting fds.
|
||||||
|
stdout_path = os.path.join(task_socket_dir, f"_stdout_{cmd}")
|
||||||
|
stderr_path = os.path.join(task_socket_dir, f"_stderr_{cmd}")
|
||||||
|
stdout_fd = os.open(stdout_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
stderr_fd = os.open(stderr_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
try:
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
full, stdout=stdout_fd, stderr=stderr_fd,
|
||||||
|
stdin=subprocess.DEVNULL, env=browser_env,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
os.close(stdout_fd)
|
||||||
|
os.close(stderr_fd)
|
||||||
|
try:
|
||||||
|
proc.wait(timeout=timeout)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
proc.kill()
|
||||||
|
proc.wait()
|
||||||
|
return {"success": False, "error": f"Chrome fallback '{cmd}' timed out"}
|
||||||
|
try:
|
||||||
|
with open(stdout_path, "r") as f:
|
||||||
|
stdout = f.read().strip()
|
||||||
|
if stdout:
|
||||||
|
return json.loads(stdout.split("\n")[-1])
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Chrome fallback tmp cmd '%s' error: %s", cmd, exc)
|
||||||
|
finally:
|
||||||
|
for p in (stdout_path, stderr_path):
|
||||||
|
try:
|
||||||
|
os.unlink(p)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return {"success": False, "error": f"Chrome fallback '{cmd}' failed"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 3. Navigate Chrome to the same URL.
|
||||||
|
nav = _run_tmp("open", [current_url])
|
||||||
|
if not nav.get("success"):
|
||||||
|
logger.warning("Chrome fallback: navigate failed: %s", nav.get("error"))
|
||||||
|
return {"success": False, "error": f"Chrome fallback navigate failed: {nav.get('error')}"}
|
||||||
|
|
||||||
|
# 4. Take the screenshot.
|
||||||
|
result = _run_tmp("screenshot", args)
|
||||||
|
return result
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# 5. Tear down the temporary Chrome session.
|
||||||
|
try:
|
||||||
|
_run_tmp("close", [])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Clean up socket directory
|
||||||
|
import shutil as _shutil
|
||||||
|
_shutil.rmtree(task_socket_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
def _auto_local_for_private_urls() -> bool:
|
def _auto_local_for_private_urls() -> bool:
|
||||||
"""Return whether a cloud-configured install should auto-spawn a local
|
"""Return whether a cloud-configured install should auto-spawn a local
|
||||||
Chromium for LAN/localhost URLs.
|
Chromium for LAN/localhost URLs.
|
||||||
|
|
@ -1371,6 +1588,7 @@ def _run_browser_command(
|
||||||
command: str,
|
command: str,
|
||||||
args: List[str] = None,
|
args: List[str] = None,
|
||||||
timeout: Optional[int] = None,
|
timeout: Optional[int] = None,
|
||||||
|
_engine_override: Optional[str] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Run an agent-browser CLI command using our pre-created Browserbase session.
|
Run an agent-browser CLI command using our pre-created Browserbase session.
|
||||||
|
|
@ -1381,6 +1599,9 @@ def _run_browser_command(
|
||||||
args: Additional arguments for the command
|
args: Additional arguments for the command
|
||||||
timeout: Command timeout in seconds. ``None`` reads
|
timeout: Command timeout in seconds. ``None`` reads
|
||||||
``browser.command_timeout`` from config (default 30s).
|
``browser.command_timeout`` from config (default 30s).
|
||||||
|
_engine_override: Force a specific engine for this call only. Used
|
||||||
|
internally by the Lightpanda fallback to retry with
|
||||||
|
Chrome without touching global state.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Parsed JSON response from agent-browser
|
Parsed JSON response from agent-browser
|
||||||
|
|
@ -1403,7 +1624,8 @@ def _run_browser_command(
|
||||||
|
|
||||||
# Local mode with no Chromium on disk: fail fast with an actionable
|
# Local mode with no Chromium on disk: fail fast with an actionable
|
||||||
# message instead of hanging for _command_timeout seconds per call.
|
# message instead of hanging for _command_timeout seconds per call.
|
||||||
if _is_local_mode() and not _chromium_installed():
|
# Skip when engine=lightpanda — LP doesn't need Chromium for navigation.
|
||||||
|
if _is_local_mode() and not _chromium_installed() and _get_browser_engine() != "lightpanda":
|
||||||
if _running_in_docker():
|
if _running_in_docker():
|
||||||
hint = (
|
hint = (
|
||||||
"Chromium browser is missing. You're running in Docker — pull "
|
"Chromium browser is missing. You're running in Docker — pull "
|
||||||
|
|
@ -1443,6 +1665,11 @@ def _run_browser_command(
|
||||||
# Local mode — launch a headless Chromium instance
|
# Local mode — launch a headless Chromium instance
|
||||||
backend_args = ["--session", session_info["session_name"]]
|
backend_args = ["--session", session_info["session_name"]]
|
||||||
|
|
||||||
|
# Lightpanda engine injection (local mode only, agent-browser v0.25.3+)
|
||||||
|
engine = _engine_override or _get_browser_engine()
|
||||||
|
if _should_inject_engine(engine):
|
||||||
|
backend_args += ["--engine", engine]
|
||||||
|
|
||||||
# Keep concrete executable paths intact, even when they contain spaces.
|
# Keep concrete executable paths intact, even when they contain spaces.
|
||||||
# Only the synthetic npx fallback needs to expand into multiple argv items.
|
# Only the synthetic npx fallback needs to expand into multiple argv items.
|
||||||
cmd_prefix = ["npx", "agent-browser"] if browser_cmd == "npx agent-browser" else [browser_cmd]
|
cmd_prefix = ["npx", "agent-browser"] if browser_cmd == "npx agent-browser" else [browser_cmd]
|
||||||
|
|
@ -1539,87 +1766,105 @@ def _run_browser_command(
|
||||||
proc.wait()
|
proc.wait()
|
||||||
logger.warning("browser '%s' timed out after %ds (task=%s, socket_dir=%s)",
|
logger.warning("browser '%s' timed out after %ds (task=%s, socket_dir=%s)",
|
||||||
command, timeout, task_id, task_socket_dir)
|
command, timeout, task_id, task_socket_dir)
|
||||||
return {"success": False, "error": f"Command timed out after {timeout} seconds"}
|
result = {"success": False, "error": f"Command timed out after {timeout} seconds"}
|
||||||
|
# Fall through to fallback check below
|
||||||
|
else:
|
||||||
|
with open(stdout_path, "r") as f:
|
||||||
|
stdout = f.read()
|
||||||
|
with open(stderr_path, "r") as f:
|
||||||
|
stderr = f.read()
|
||||||
|
returncode = proc.returncode
|
||||||
|
|
||||||
with open(stdout_path, "r") as f:
|
# Clean up temp files (best-effort)
|
||||||
stdout = f.read()
|
for p in (stdout_path, stderr_path):
|
||||||
with open(stderr_path, "r") as f:
|
try:
|
||||||
stderr = f.read()
|
os.unlink(p)
|
||||||
returncode = proc.returncode
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
# Clean up temp files (best-effort)
|
# Log stderr for diagnostics — use warning level on failure so it's visible
|
||||||
for p in (stdout_path, stderr_path):
|
if stderr and stderr.strip():
|
||||||
try:
|
level = logging.WARNING if returncode != 0 else logging.DEBUG
|
||||||
os.unlink(p)
|
logger.log(level, "browser '%s' stderr: %s", command, stderr.strip()[:500])
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Log stderr for diagnostics — use warning level on failure so it's visible
|
stdout_text = stdout.strip()
|
||||||
if stderr and stderr.strip():
|
|
||||||
level = logging.WARNING if returncode != 0 else logging.DEBUG
|
|
||||||
logger.log(level, "browser '%s' stderr: %s", command, stderr.strip()[:500])
|
|
||||||
|
|
||||||
stdout_text = stdout.strip()
|
# Empty output with rc=0 is a broken state — treat as failure rather
|
||||||
|
# than silently returning {"success": True, "data": {}}.
|
||||||
|
# Some commands (close, record) legitimately return no output.
|
||||||
|
if not stdout_text and returncode == 0 and command not in _EMPTY_OK_COMMANDS:
|
||||||
|
logger.warning("browser '%s' returned empty output (rc=0)", command)
|
||||||
|
result = {"success": False, "error": f"Browser command '{command}' returned no output"}
|
||||||
|
elif stdout_text:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(stdout_text)
|
||||||
|
# Warn if snapshot came back empty (common sign of daemon/CDP issues)
|
||||||
|
if command == "snapshot" and parsed.get("success"):
|
||||||
|
snap_data = parsed.get("data", {})
|
||||||
|
if not snap_data.get("snapshot") and not snap_data.get("refs"):
|
||||||
|
logger.warning("snapshot returned empty content. "
|
||||||
|
"Possible stale daemon or CDP connection issue. "
|
||||||
|
"returncode=%s", returncode)
|
||||||
|
result = parsed
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raw = stdout_text[:2000]
|
||||||
|
logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
|
||||||
|
command, returncode, raw[:500])
|
||||||
|
|
||||||
# Empty output with rc=0 is a broken state — treat as failure rather
|
if command == "screenshot":
|
||||||
# than silently returning {"success": True, "data": {}}.
|
stderr_text = (stderr or "").strip()
|
||||||
# Some commands (close, record) legitimately return no output.
|
combined_text = "\n".join(
|
||||||
if not stdout_text and returncode == 0 and command not in _EMPTY_OK_COMMANDS:
|
part for part in [stdout_text, stderr_text] if part
|
||||||
logger.warning("browser '%s' returned empty output (rc=0)", command)
|
|
||||||
return {"success": False, "error": f"Browser command '{command}' returned no output"}
|
|
||||||
|
|
||||||
if stdout_text:
|
|
||||||
try:
|
|
||||||
parsed = json.loads(stdout_text)
|
|
||||||
# Warn if snapshot came back empty (common sign of daemon/CDP issues)
|
|
||||||
if command == "snapshot" and parsed.get("success"):
|
|
||||||
snap_data = parsed.get("data", {})
|
|
||||||
if not snap_data.get("snapshot") and not snap_data.get("refs"):
|
|
||||||
logger.warning("snapshot returned empty content. "
|
|
||||||
"Possible stale daemon or CDP connection issue. "
|
|
||||||
"returncode=%s", returncode)
|
|
||||||
return parsed
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
raw = stdout_text[:2000]
|
|
||||||
logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
|
|
||||||
command, returncode, raw[:500])
|
|
||||||
|
|
||||||
if command == "screenshot":
|
|
||||||
stderr_text = (stderr or "").strip()
|
|
||||||
combined_text = "\n".join(
|
|
||||||
part for part in [stdout_text, stderr_text] if part
|
|
||||||
)
|
|
||||||
recovered_path = _extract_screenshot_path_from_text(combined_text)
|
|
||||||
|
|
||||||
if recovered_path and Path(recovered_path).exists():
|
|
||||||
logger.info(
|
|
||||||
"browser 'screenshot' recovered file from non-JSON output: %s",
|
|
||||||
recovered_path,
|
|
||||||
)
|
)
|
||||||
return {
|
recovered_path = _extract_screenshot_path_from_text(combined_text)
|
||||||
"success": True,
|
|
||||||
"data": {
|
if recovered_path and Path(recovered_path).exists():
|
||||||
"path": recovered_path,
|
logger.info(
|
||||||
"raw": raw,
|
"browser 'screenshot' recovered file from non-JSON output: %s",
|
||||||
},
|
recovered_path,
|
||||||
|
)
|
||||||
|
result = {
|
||||||
|
"success": True,
|
||||||
|
"data": {
|
||||||
|
"path": recovered_path,
|
||||||
|
"raw": raw,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
result = {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Non-JSON output from agent-browser for '{command}': {raw}"
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
result = {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Non-JSON output from agent-browser for '{command}': {raw}"
|
||||||
}
|
}
|
||||||
|
elif returncode != 0:
|
||||||
return {
|
# Check for errors
|
||||||
"success": False,
|
error_msg = stderr.strip() if stderr else f"Command failed with code {returncode}"
|
||||||
"error": f"Non-JSON output from agent-browser for '{command}': {raw}"
|
logger.warning("browser '%s' failed (rc=%s): %s", command, returncode, error_msg[:300])
|
||||||
}
|
result = {"success": False, "error": error_msg}
|
||||||
|
else:
|
||||||
# Check for errors
|
result = {"success": True, "data": {}}
|
||||||
if returncode != 0:
|
|
||||||
error_msg = stderr.strip() if stderr else f"Command failed with code {returncode}"
|
|
||||||
logger.warning("browser '%s' failed (rc=%s): %s", command, returncode, error_msg[:300])
|
|
||||||
return {"success": False, "error": error_msg}
|
|
||||||
|
|
||||||
return {"success": True, "data": {}}
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("browser '%s' exception: %s", command, e, exc_info=True)
|
logger.warning("browser '%s' exception: %s", command, e, exc_info=True)
|
||||||
return {"success": False, "error": str(e)}
|
result = {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
# --- Lightpanda automatic Chrome fallback ---
|
||||||
|
# If engine is lightpanda and the result looks broken, retry with Chrome.
|
||||||
|
# This runs for ALL exit paths (timeout, empty, non-JSON, nonzero rc, parsed).
|
||||||
|
if _needs_lightpanda_fallback(engine, command, result):
|
||||||
|
logger.info("Lightpanda fallback: retrying '%s' with Chrome (task=%s)", command, task_id)
|
||||||
|
# For screenshots, use the dedicated Chrome fallback helper
|
||||||
|
# (spins up a separate Chrome session to the same URL).
|
||||||
|
if command == "screenshot":
|
||||||
|
return _chrome_fallback_screenshot(task_id, args or [], timeout)
|
||||||
|
# For other commands, re-run with engine forced to "auto" (Chrome).
|
||||||
|
return _run_browser_command(task_id, command, args, timeout, _engine_override="auto")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _extract_relevant_content(
|
def _extract_relevant_content(
|
||||||
|
|
@ -2400,6 +2645,49 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||||
import uuid as uuid_mod
|
import uuid as uuid_mod
|
||||||
effective_task_id = _last_session_key(task_id or "default")
|
effective_task_id = _last_session_key(task_id or "default")
|
||||||
|
|
||||||
|
# Lightpanda has no graphical renderer — pre-route screenshots to Chrome
|
||||||
|
# via the fallback helper instead of letting the normal path fail with a
|
||||||
|
# CDP error or return a placeholder PNG.
|
||||||
|
engine = _get_browser_engine()
|
||||||
|
_lp_prerouted = False
|
||||||
|
if engine == "lightpanda" and _should_inject_engine(engine):
|
||||||
|
logger.debug("browser_vision: pre-routing screenshot to Chrome (engine=lightpanda)")
|
||||||
|
screenshot_args = []
|
||||||
|
if annotate:
|
||||||
|
screenshot_args.append("--annotate")
|
||||||
|
fb_result = _chrome_fallback_screenshot(
|
||||||
|
effective_task_id, screenshot_args, _get_command_timeout(),
|
||||||
|
)
|
||||||
|
if fb_result.get("success"):
|
||||||
|
# Proceed with the Chrome screenshot for vision analysis
|
||||||
|
fb_path = fb_result.get("data", {}).get("path", "")
|
||||||
|
if fb_path and os.path.exists(fb_path):
|
||||||
|
try:
|
||||||
|
with open(fb_path, "rb") as f:
|
||||||
|
image_data = base64.b64encode(f.read()).decode("utf-8")
|
||||||
|
analysis = call_llm(
|
||||||
|
f"Analyze this browser screenshot and answer: {question}",
|
||||||
|
images=[{"data": image_data, "media_type": "image/png"}],
|
||||||
|
task="vision",
|
||||||
|
)
|
||||||
|
from hermes_constants import get_hermes_dir
|
||||||
|
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||||
|
screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
# Copy to persistent location
|
||||||
|
import shutil as _shutil_vision
|
||||||
|
persistent_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
|
||||||
|
_shutil_vision.copy2(fb_path, persistent_path)
|
||||||
|
return json.dumps({
|
||||||
|
"analysis": analysis,
|
||||||
|
"screenshot_path": str(persistent_path),
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Lightpanda Chrome fallback vision failed: %s", e)
|
||||||
|
# Fall through to normal path as last resort
|
||||||
|
# Mark that we already tried the Chrome fallback, so the normal
|
||||||
|
# _run_browser_command path doesn't trigger it a second time.
|
||||||
|
_lp_prerouted = True
|
||||||
|
|
||||||
# Save screenshot to persistent location so it can be shared with users
|
# Save screenshot to persistent location so it can be shared with users
|
||||||
from hermes_constants import get_hermes_dir
|
from hermes_constants import get_hermes_dir
|
||||||
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||||
|
|
@ -2421,6 +2709,9 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||||
effective_task_id,
|
effective_task_id,
|
||||||
"screenshot",
|
"screenshot",
|
||||||
screenshot_args,
|
screenshot_args,
|
||||||
|
# If the Lightpanda pre-route already failed, force Chrome so
|
||||||
|
# _run_browser_command doesn't trigger a redundant LP fallback.
|
||||||
|
_engine_override="auto" if _lp_prerouted else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not result.get("success"):
|
if not result.get("success"):
|
||||||
|
|
@ -2738,12 +3029,15 @@ def cleanup_all_browsers() -> None:
|
||||||
global _cached_agent_browser, _agent_browser_resolved
|
global _cached_agent_browser, _agent_browser_resolved
|
||||||
global _cached_command_timeout, _command_timeout_resolved
|
global _cached_command_timeout, _command_timeout_resolved
|
||||||
global _cached_chromium_installed
|
global _cached_chromium_installed
|
||||||
|
global _cached_browser_engine, _browser_engine_resolved
|
||||||
_cached_agent_browser = None
|
_cached_agent_browser = None
|
||||||
_agent_browser_resolved = False
|
_agent_browser_resolved = False
|
||||||
_discover_homebrew_node_dirs.cache_clear()
|
_discover_homebrew_node_dirs.cache_clear()
|
||||||
_cached_command_timeout = None
|
_cached_command_timeout = None
|
||||||
_command_timeout_resolved = False
|
_command_timeout_resolved = False
|
||||||
_cached_chromium_installed = None
|
_cached_chromium_installed = None
|
||||||
|
_cached_browser_engine = None
|
||||||
|
_browser_engine_resolved = False
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Requirements Check
|
# Requirements Check
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue