mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-15 04:12:25 +00:00
fix(browser): surface Lightpanda Chrome fallback warnings
This commit is contained in:
parent
395dbcc873
commit
3ebdd26449
3 changed files with 504 additions and 208 deletions
|
|
@ -157,6 +157,14 @@ class TestNeedsLightpandaFallback:
|
|||
result = {"success": False, "error": "page.goto: Timeout"}
|
||||
assert _needs_lightpanda_fallback("lightpanda", "open", result) is True
|
||||
|
||||
def test_failed_command_reason_is_user_visible(self):
|
||||
from tools.browser_tool import _lightpanda_fallback_reason
|
||||
result = {"success": False, "error": "page.goto: Timeout"}
|
||||
reason = _lightpanda_fallback_reason("lightpanda", "open", result)
|
||||
assert reason is not None
|
||||
assert "page.goto: Timeout" in reason
|
||||
assert "retried with Chrome" in reason
|
||||
|
||||
def test_empty_snapshot_triggers_fallback(self):
|
||||
from tools.browser_tool import _needs_lightpanda_fallback
|
||||
result = {"success": True, "data": {"snapshot": ""}}
|
||||
|
|
@ -260,6 +268,145 @@ class TestCleanupResetsEngineCache:
|
|||
assert bt._browser_engine_resolved is False
|
||||
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fallback warning annotation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestLightpandaFallbackWarning:
|
||||
"""Verify Chrome fallback results are annotated for users."""
|
||||
|
||||
def test_fallback_result_gets_user_visible_warning(self):
|
||||
from tools.browser_tool import _annotate_lightpanda_fallback
|
||||
|
||||
result = {"success": True, "data": {"snapshot": "- heading \"Hello\" [ref=e1]"}}
|
||||
annotated = _annotate_lightpanda_fallback(
|
||||
result,
|
||||
"Lightpanda returned an empty/too-short snapshot; retried with Chrome.",
|
||||
)
|
||||
|
||||
assert annotated["browser_engine"] == "chrome"
|
||||
assert "Lightpanda fallback" in annotated["fallback_warning"]
|
||||
assert annotated["browser_engine_fallback"] == {
|
||||
"from": "lightpanda",
|
||||
"to": "chrome",
|
||||
"reason": "Lightpanda returned an empty/too-short snapshot; retried with Chrome.",
|
||||
}
|
||||
assert annotated["data"]["fallback_warning"] == annotated["fallback_warning"]
|
||||
assert annotated["data"]["browser_engine"] == "chrome"
|
||||
|
||||
|
||||
def test_browser_navigate_surfaces_fallback_warning(self):
|
||||
import json
|
||||
import tools.browser_tool as bt
|
||||
|
||||
result = bt._annotate_lightpanda_fallback(
|
||||
{"success": True, "data": {"title": "Fallback OK", "url": "https://example.com/"}},
|
||||
"synthetic Lightpanda failure; retried with Chrome.",
|
||||
)
|
||||
|
||||
with patch("tools.browser_tool._is_local_backend", return_value=True), \
|
||||
patch("tools.browser_tool._get_cloud_provider", return_value=None), \
|
||||
patch("tools.browser_tool._get_session_info", return_value={
|
||||
"session_name": "test", "_first_nav": False, "features": {"local": True, "proxies": True}
|
||||
}), \
|
||||
patch("tools.browser_tool._run_browser_command", side_effect=[
|
||||
result,
|
||||
{"success": True, "data": {"snapshot": "- heading \"Fallback OK\" [ref=e1]", "refs": {"e1": {}}}},
|
||||
]):
|
||||
response = json.loads(bt.browser_navigate("https://example.com", task_id="warn-test"))
|
||||
|
||||
assert response["success"] is True
|
||||
assert response["browser_engine"] == "chrome"
|
||||
assert "Lightpanda fallback" in response["fallback_warning"]
|
||||
assert response["browser_engine_fallback"]["from"] == "lightpanda"
|
||||
assert response["browser_engine_fallback"]["to"] == "chrome"
|
||||
bt._last_active_session_key.pop("warn-test", None)
|
||||
|
||||
def test_browser_navigate_surfaces_auto_snapshot_fallback_warning(self):
|
||||
import json
|
||||
import tools.browser_tool as bt
|
||||
|
||||
snapshot_result = bt._annotate_lightpanda_fallback(
|
||||
{"success": True, "data": {"snapshot": "- heading \"Fallback OK\" [ref=e1]", "refs": {"e1": {}}}},
|
||||
"Lightpanda returned an empty/too-short snapshot; retried with Chrome.",
|
||||
)
|
||||
|
||||
with patch("tools.browser_tool._is_local_backend", return_value=True), \
|
||||
patch("tools.browser_tool._get_cloud_provider", return_value=None), \
|
||||
patch("tools.browser_tool._get_session_info", return_value={
|
||||
"session_name": "test", "_first_nav": False, "features": {"local": True, "proxies": True}
|
||||
}), \
|
||||
patch("tools.browser_tool._run_browser_command", side_effect=[
|
||||
{"success": True, "data": {"title": "Fallback OK", "url": "https://example.com/"}},
|
||||
snapshot_result,
|
||||
]):
|
||||
response = json.loads(bt.browser_navigate("https://example.com", task_id="warn-test2"))
|
||||
|
||||
assert response["success"] is True
|
||||
assert response["browser_engine"] == "chrome"
|
||||
assert "Lightpanda fallback" in response["fallback_warning"]
|
||||
assert response["element_count"] == 1
|
||||
bt._last_active_session_key.pop("warn-test2", None)
|
||||
|
||||
def test_failed_fallback_warning_is_preserved_on_click_error(self):
|
||||
import json
|
||||
import tools.browser_tool as bt
|
||||
|
||||
result = bt._annotate_lightpanda_fallback(
|
||||
{"success": False, "error": "Chrome fallback failed"},
|
||||
"Lightpanda 'click' failed (timeout); retried with Chrome.",
|
||||
)
|
||||
bt._last_active_session_key["warn-test3"] = "warn-test3"
|
||||
with patch("tools.browser_tool._run_browser_command", return_value=result):
|
||||
response = json.loads(bt.browser_click("@e1", task_id="warn-test3"))
|
||||
|
||||
assert response["success"] is False
|
||||
assert "Lightpanda fallback" in response["fallback_warning"]
|
||||
assert response["browser_engine"] == "chrome"
|
||||
bt._last_active_session_key.pop("warn-test3", None)
|
||||
|
||||
|
||||
def test_browser_vision_lightpanda_uses_chrome_capture_and_normal_call_llm_shape(self, tmp_path):
|
||||
import json
|
||||
import tools.browser_tool as bt
|
||||
|
||||
chrome_shot = tmp_path / "chrome.png"
|
||||
chrome_shot.write_bytes(b"\x89PNG" + b"0" * 128)
|
||||
|
||||
class _Msg:
|
||||
content = "Example Domain screenshot"
|
||||
|
||||
class _Choice:
|
||||
message = _Msg()
|
||||
|
||||
class _Response:
|
||||
choices = [_Choice()]
|
||||
|
||||
captured_kwargs = {}
|
||||
|
||||
def fake_call_llm(**kwargs):
|
||||
captured_kwargs.update(kwargs)
|
||||
return _Response()
|
||||
|
||||
with patch("tools.browser_tool._get_browser_engine", return_value="lightpanda"), \
|
||||
patch("tools.browser_tool._should_inject_engine", return_value=True), \
|
||||
patch("tools.browser_tool._chrome_fallback_screenshot", return_value={
|
||||
"success": True, "data": {"path": str(chrome_shot)}
|
||||
}), \
|
||||
patch("hermes_constants.get_hermes_dir", return_value=tmp_path), \
|
||||
patch("tools.browser_tool.call_llm", side_effect=fake_call_llm):
|
||||
response = json.loads(bt.browser_vision("what is this?", task_id="vision-test"))
|
||||
|
||||
assert response["success"] is True
|
||||
assert response["analysis"] == "Example Domain screenshot"
|
||||
assert response["browser_engine"] == "chrome"
|
||||
assert "Lightpanda fallback" in response["fallback_warning"]
|
||||
assert "messages" in captured_kwargs
|
||||
assert "images" not in captured_kwargs
|
||||
assert captured_kwargs["task"] == "vision"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _engine_override parameter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -361,3 +508,48 @@ class TestEngineOverride:
|
|||
assert "--engine" in captured_cmds[0]
|
||||
engine_idx = captured_cmds[0].index("--engine")
|
||||
assert captured_cmds[0][engine_idx + 1] == "lightpanda"
|
||||
|
||||
def test_hybrid_local_sidecar_injects_engine_even_with_cloud_provider(self):
|
||||
"""A task::local sidecar is local even when global cloud config exists."""
|
||||
import tools.browser_tool as bt
|
||||
|
||||
bt._cached_browser_engine = "lightpanda"
|
||||
bt._browser_engine_resolved = True
|
||||
captured_cmds = []
|
||||
mock_provider = MagicMock()
|
||||
|
||||
mock_proc = MagicMock()
|
||||
mock_proc.wait.return_value = None
|
||||
mock_proc.returncode = 0
|
||||
|
||||
def capture_popen(cmd, **kwargs):
|
||||
captured_cmds.append(cmd)
|
||||
return mock_proc
|
||||
|
||||
mock_stdout = json.dumps({
|
||||
"success": True,
|
||||
"data": {"snapshot": '- heading "Hello" [ref=e1]', "refs": {"e1": {}}},
|
||||
})
|
||||
with patch("tools.browser_tool._get_session_info", return_value={"session_name": "local-sidecar"}), \
|
||||
patch("tools.browser_tool._find_agent_browser", return_value="/usr/bin/agent-browser"), \
|
||||
patch("tools.browser_tool._is_local_mode", return_value=False), \
|
||||
patch("tools.browser_tool._chromium_installed", return_value=True), \
|
||||
patch("tools.browser_tool._get_cloud_provider", return_value=mock_provider), \
|
||||
patch("tools.browser_tool._get_cdp_override", return_value=""), \
|
||||
patch("tools.browser_tool._is_camofox_mode", return_value=False), \
|
||||
patch("subprocess.Popen", side_effect=capture_popen), \
|
||||
patch("os.open", return_value=99), \
|
||||
patch("os.close"), \
|
||||
patch("os.unlink"), \
|
||||
patch("os.makedirs"), \
|
||||
patch("builtins.open", MagicMock(return_value=MagicMock(
|
||||
__enter__=MagicMock(return_value=MagicMock(read=MagicMock(return_value=mock_stdout))),
|
||||
__exit__=MagicMock(return_value=False),
|
||||
))), \
|
||||
patch("tools.interrupt.is_interrupted", return_value=False), \
|
||||
patch("tools.browser_tool._write_owner_pid"):
|
||||
bt._run_browser_command("task::local", "snapshot", [])
|
||||
|
||||
assert len(captured_cmds) == 1
|
||||
assert "--engine" in captured_cmds[0]
|
||||
assert captured_cmds[0][captured_cmds[0].index("--engine") + 1] == "lightpanda"
|
||||
|
|
|
|||
|
|
@ -555,18 +555,15 @@ def _should_inject_engine(engine: str) -> bool:
|
|||
return _is_local_mode()
|
||||
|
||||
|
||||
def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]) -> bool:
|
||||
"""Check if a Lightpanda result should trigger an automatic Chrome fallback.
|
||||
def _lightpanda_fallback_reason(engine: str, command: str, result: Dict[str, Any]) -> Optional[str]:
|
||||
"""Return the user-visible reason a Lightpanda result needs Chrome fallback.
|
||||
|
||||
Returns True when:
|
||||
- The engine is lightpanda AND
|
||||
- The command is fallback-eligible (not close/record) AND
|
||||
- The command failed, OR
|
||||
- A snapshot came back empty/suspiciously short, OR
|
||||
- A screenshot returned but is likely the Lightpanda placeholder PNG
|
||||
``None`` means no fallback should run. The returned string is copied into
|
||||
the fallback result so CLI/TUI/gateway users can see when Hermes silently
|
||||
switched from Lightpanda to Chrome for completeness.
|
||||
"""
|
||||
if engine != "lightpanda":
|
||||
return False
|
||||
return None
|
||||
|
||||
# Only retry commands where Chrome can meaningfully produce a different
|
||||
# result. Session-management commands (close, record) are tied to the
|
||||
|
|
@ -574,11 +571,12 @@ def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]
|
|||
_FALLBACK_ELIGIBLE = {"open", "snapshot", "screenshot", "eval", "click",
|
||||
"fill", "scroll", "back", "press", "console", "errors"}
|
||||
if command not in _FALLBACK_ELIGIBLE:
|
||||
return False
|
||||
return None
|
||||
|
||||
# Explicit failure
|
||||
if not result.get("success"):
|
||||
return True
|
||||
error = str(result.get("error") or "command failed").strip()
|
||||
return f"Lightpanda {command!r} failed ({error}); retried with Chrome."
|
||||
|
||||
data = result.get("data", {})
|
||||
|
||||
|
|
@ -586,7 +584,7 @@ def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]
|
|||
snap = data.get("snapshot", "")
|
||||
# Empty or near-empty snapshots indicate Lightpanda couldn't render
|
||||
if not snap or len(snap.strip()) < 20:
|
||||
return True
|
||||
return "Lightpanda returned an empty/too-short snapshot; retried with Chrome."
|
||||
|
||||
if command == "screenshot":
|
||||
# Lightpanda returns a placeholder PNG with its panda logo.
|
||||
|
|
@ -599,32 +597,79 @@ def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]
|
|||
if size < 20480:
|
||||
logger.debug("Lightpanda screenshot is suspiciously small (%d bytes), "
|
||||
"triggering Chrome fallback", size)
|
||||
return True
|
||||
return (
|
||||
f"Lightpanda screenshot was suspiciously small ({size} bytes); "
|
||||
"retried with Chrome."
|
||||
)
|
||||
except OSError:
|
||||
return True # file doesn't exist or can't be read
|
||||
return "Lightpanda screenshot file was missing/unreadable; retried with Chrome."
|
||||
|
||||
return False
|
||||
return None
|
||||
|
||||
|
||||
def _chrome_fallback_screenshot(
|
||||
def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]) -> bool:
|
||||
"""Check if a Lightpanda result should trigger an automatic Chrome fallback."""
|
||||
return _lightpanda_fallback_reason(engine, command, result) is not None
|
||||
|
||||
|
||||
def _annotate_lightpanda_fallback(result: Dict[str, Any], reason: str) -> Dict[str, Any]:
|
||||
"""Add a user-visible Chrome fallback warning to a browser command result."""
|
||||
warning = (
|
||||
"⚠ Lightpanda fallback: Chrome was used for this browser action. "
|
||||
f"{reason}"
|
||||
)
|
||||
annotated = dict(result)
|
||||
annotated["fallback_warning"] = warning
|
||||
annotated["browser_engine"] = "chrome"
|
||||
annotated["browser_engine_fallback"] = {
|
||||
"from": "lightpanda",
|
||||
"to": "chrome",
|
||||
"reason": reason,
|
||||
}
|
||||
data = annotated.get("data")
|
||||
if isinstance(data, dict):
|
||||
data = dict(data)
|
||||
data.setdefault("fallback_warning", warning)
|
||||
data.setdefault("browser_engine", "chrome")
|
||||
data.setdefault(
|
||||
"browser_engine_fallback",
|
||||
{"from": "lightpanda", "to": "chrome", "reason": reason},
|
||||
)
|
||||
annotated["data"] = data
|
||||
return annotated
|
||||
|
||||
|
||||
def _copy_fallback_warning(target: Dict[str, Any], result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Copy browser fallback metadata from an internal result into a tool response."""
|
||||
if result.get("fallback_warning"):
|
||||
target["fallback_warning"] = result["fallback_warning"]
|
||||
target["browser_engine"] = result.get("browser_engine")
|
||||
target["browser_engine_fallback"] = result.get("browser_engine_fallback")
|
||||
return target
|
||||
|
||||
|
||||
def _run_chrome_fallback_command(
|
||||
task_id: str,
|
||||
command: str,
|
||||
args: List[str],
|
||||
timeout: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""Take a screenshot using a temporary Chrome session.
|
||||
"""Run a browser command in a temporary Chrome session at the current URL.
|
||||
|
||||
When the active session uses Lightpanda, ``--engine chrome`` on the same
|
||||
session has no effect — the engine is locked at daemon startup. This
|
||||
helper spins up a **separate** Chrome session, navigates to the same URL
|
||||
the agent is currently viewing, takes the screenshot, then tears down the
|
||||
temporary session.
|
||||
|
||||
Returns the screenshot result dict (same shape as ``_run_browser_command``).
|
||||
agent-browser locks the engine when a named daemon starts. Passing
|
||||
``--engine chrome`` to the same Lightpanda ``--session`` cannot change that
|
||||
running daemon. This helper always uses a fresh temporary Chrome session,
|
||||
navigates it to the current Lightpanda URL, runs ``command``, then tears it
|
||||
down.
|
||||
"""
|
||||
import uuid
|
||||
|
||||
# 1. Grab the current URL from the Lightpanda session.
|
||||
url_result = _run_browser_command(task_id, "eval", ["window.location.href"], timeout=10)
|
||||
# 1. Grab the current URL from the Lightpanda session. Use
|
||||
# ``_engine_override=\"auto\"`` so this helper does not recursively trigger
|
||||
# Lightpanda→Chrome fallback if the eval call itself fails.
|
||||
url_result = _run_browser_command(
|
||||
task_id, "eval", ["window.location.href"], timeout=10, _engine_override="auto"
|
||||
)
|
||||
current_url = None
|
||||
if url_result.get("success"):
|
||||
current_url = url_result.get("data", {}).get("result", "").strip().strip('"').strip("'")
|
||||
|
|
@ -647,6 +692,9 @@ def _chrome_fallback_screenshot(
|
|||
browser_env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
|
||||
browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", ""))
|
||||
|
||||
if "AGENT_BROWSER_IDLE_TIMEOUT_MS" not in browser_env:
|
||||
browser_env["AGENT_BROWSER_IDLE_TIMEOUT_MS"] = str(BROWSER_SESSION_INACTIVITY_TIMEOUT * 1000)
|
||||
|
||||
def _run_tmp(cmd: str, cmd_args: List[str]) -> Dict[str, Any]:
|
||||
full = base_args + [cmd] + cmd_args
|
||||
# Use temp-file stdout/stderr pattern (same as _run_browser_command)
|
||||
|
|
@ -677,9 +725,9 @@ def _chrome_fallback_screenshot(
|
|||
except Exception as exc:
|
||||
logger.debug("Chrome fallback tmp cmd '%s' error: %s", cmd, exc)
|
||||
finally:
|
||||
for p in (stdout_path, stderr_path):
|
||||
for pth in (stdout_path, stderr_path):
|
||||
try:
|
||||
os.unlink(p)
|
||||
os.unlink(pth)
|
||||
except OSError:
|
||||
pass
|
||||
return {"success": False, "error": f"Chrome fallback '{cmd}' failed"}
|
||||
|
|
@ -691,9 +739,8 @@ def _chrome_fallback_screenshot(
|
|||
logger.warning("Chrome fallback: navigate failed: %s", nav.get("error"))
|
||||
return {"success": False, "error": f"Chrome fallback navigate failed: {nav.get('error')}"}
|
||||
|
||||
# 4. Take the screenshot.
|
||||
result = _run_tmp("screenshot", args)
|
||||
return result
|
||||
# 4. Run the requested command in Chrome.
|
||||
return _run_tmp(command, args)
|
||||
|
||||
finally:
|
||||
# 5. Tear down the temporary Chrome session.
|
||||
|
|
@ -706,6 +753,15 @@ def _chrome_fallback_screenshot(
|
|||
_shutil.rmtree(task_socket_dir, ignore_errors=True)
|
||||
|
||||
|
||||
def _chrome_fallback_screenshot(
|
||||
task_id: str,
|
||||
args: List[str],
|
||||
timeout: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""Take a screenshot using a temporary Chrome session."""
|
||||
return _run_chrome_fallback_command(task_id, "screenshot", args, timeout)
|
||||
|
||||
|
||||
def _auto_local_for_private_urls() -> bool:
|
||||
"""Return whether a cloud-configured install should auto-spawn a local
|
||||
Chromium for LAN/localhost URLs.
|
||||
|
|
@ -1665,9 +1721,12 @@ def _run_browser_command(
|
|||
# Local mode — launch a headless Chromium instance
|
||||
backend_args = ["--session", session_info["session_name"]]
|
||||
|
||||
# Lightpanda engine injection (local mode only, agent-browser v0.25.3+)
|
||||
# Lightpanda engine injection (local mode only, agent-browser v0.25.3+).
|
||||
# Use the resolved session backend rather than global cloud-provider state:
|
||||
# hybrid private-URL routing can create a local sidecar while a cloud
|
||||
# provider remains configured for public URLs.
|
||||
engine = _engine_override or _get_browser_engine()
|
||||
if _should_inject_engine(engine):
|
||||
if engine != "auto" and not _is_camofox_mode() and not session_info.get("cdp_url"):
|
||||
backend_args += ["--engine", engine]
|
||||
|
||||
# Keep concrete executable paths intact, even when they contain spaces.
|
||||
|
|
@ -1855,14 +1914,21 @@ def _run_browser_command(
|
|||
# --- Lightpanda automatic Chrome fallback ---
|
||||
# If engine is lightpanda and the result looks broken, retry with Chrome.
|
||||
# This runs for ALL exit paths (timeout, empty, non-JSON, nonzero rc, parsed).
|
||||
if _needs_lightpanda_fallback(engine, command, result):
|
||||
logger.info("Lightpanda fallback: retrying '%s' with Chrome (task=%s)", command, task_id)
|
||||
fallback_reason = _lightpanda_fallback_reason(engine, command, result)
|
||||
if fallback_reason:
|
||||
logger.info(
|
||||
"Lightpanda fallback: retrying '%s' with Chrome (task=%s): %s",
|
||||
command,
|
||||
task_id,
|
||||
fallback_reason,
|
||||
)
|
||||
# For screenshots, use the dedicated Chrome fallback helper
|
||||
# (spins up a separate Chrome session to the same URL).
|
||||
if command == "screenshot":
|
||||
return _chrome_fallback_screenshot(task_id, args or [], timeout)
|
||||
# For other commands, re-run with engine forced to "auto" (Chrome).
|
||||
return _run_browser_command(task_id, command, args, timeout, _engine_override="auto")
|
||||
fallback_result = _chrome_fallback_screenshot(task_id, args or [], timeout)
|
||||
else:
|
||||
fallback_result = _run_chrome_fallback_command(task_id, command, args, timeout)
|
||||
return _annotate_lightpanda_fallback(fallback_result, fallback_reason)
|
||||
|
||||
return result
|
||||
|
||||
|
|
@ -2075,6 +2141,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
|||
"url": final_url,
|
||||
"title": title
|
||||
}
|
||||
_copy_fallback_warning(response, result)
|
||||
|
||||
# Detect common "blocked" page patterns from title/url
|
||||
blocked_patterns = [
|
||||
|
|
@ -2117,6 +2184,8 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
|||
snapshot_text = _truncate_snapshot(snapshot_text)
|
||||
response["snapshot"] = snapshot_text
|
||||
response["element_count"] = len(refs) if refs else 0
|
||||
if snap_result.get("fallback_warning") and not response.get("fallback_warning"):
|
||||
_copy_fallback_warning(response, snap_result)
|
||||
except Exception as e:
|
||||
logger.debug("Auto-snapshot after navigate failed: %s", e)
|
||||
|
||||
|
|
@ -2173,6 +2242,7 @@ def browser_snapshot(
|
|||
"snapshot": snapshot_text,
|
||||
"element_count": len(refs) if refs else 0
|
||||
}
|
||||
_copy_fallback_warning(response, result)
|
||||
|
||||
# Merge supervisor state (pending dialogs + frame tree) when a CDP
|
||||
# supervisor is attached to this task. No-op otherwise. See
|
||||
|
|
@ -2189,10 +2259,11 @@ def browser_snapshot(
|
|||
|
||||
return json.dumps(response, ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": False,
|
||||
"error": result.get("error", "Failed to get snapshot")
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_click(ref: str, task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -2219,15 +2290,17 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
|
|||
result = _run_browser_command(effective_task_id, "click", [ref])
|
||||
|
||||
if result.get("success"):
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": True,
|
||||
"clicked": ref
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": False,
|
||||
"error": result.get("error", f"Failed to click {ref}")
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -2256,16 +2329,18 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
|
|||
result = _run_browser_command(effective_task_id, "fill", [ref, text])
|
||||
|
||||
if result.get("success"):
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": True,
|
||||
"typed": text,
|
||||
"element": ref
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": False,
|
||||
"error": result.get("error", f"Failed to type into {ref}")
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -2304,15 +2379,17 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
|
|||
|
||||
result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)])
|
||||
if not result.get("success"):
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": False,
|
||||
"error": result.get("error", f"Failed to scroll {direction}")
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": True,
|
||||
"scrolled": direction
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_back(task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -2334,15 +2411,17 @@ def browser_back(task_id: Optional[str] = None) -> str:
|
|||
|
||||
if result.get("success"):
|
||||
data = result.get("data", {})
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": True,
|
||||
"url": data.get("url", "")
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": False,
|
||||
"error": result.get("error", "Failed to go back")
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_press(key: str, task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -2364,15 +2443,17 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
|
|||
result = _run_browser_command(effective_task_id, "press", [key])
|
||||
|
||||
if result.get("success"):
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": True,
|
||||
"pressed": key
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": False,
|
||||
"error": result.get("error", f"Failed to press {key}")
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
|
||||
|
||||
|
||||
|
||||
|
|
@ -2427,13 +2508,17 @@ def browser_console(clear: bool = False, expression: Optional[str] = None, task_
|
|||
"source": "exception",
|
||||
})
|
||||
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": True,
|
||||
"console_messages": messages,
|
||||
"js_errors": errors,
|
||||
"total_messages": len(messages),
|
||||
"total_errors": len(errors),
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
_copy_fallback_warning(response, console_result)
|
||||
if errors_result.get("fallback_warning") and not response.get("fallback_warning"):
|
||||
_copy_fallback_warning(response, errors_result)
|
||||
return json.dumps(response, ensure_ascii=False)
|
||||
|
||||
|
||||
def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -2448,14 +2533,16 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
|
|||
err = result.get("error", "eval failed")
|
||||
# Detect backend capability gaps and give the model a clear signal
|
||||
if any(hint in err.lower() for hint in ("unknown command", "not supported", "not found", "no such command")):
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": False,
|
||||
"error": f"JavaScript evaluation is not supported by this browser backend. {err}",
|
||||
})
|
||||
return json.dumps({
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result))
|
||||
response = {
|
||||
"success": False,
|
||||
"error": err,
|
||||
})
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result))
|
||||
|
||||
data = result.get("data", {})
|
||||
raw_result = data.get("result")
|
||||
|
|
@ -2469,11 +2556,12 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
|
|||
except (json.JSONDecodeError, ValueError):
|
||||
pass # keep as string
|
||||
|
||||
return json.dumps({
|
||||
response = {
|
||||
"success": True,
|
||||
"result": parsed,
|
||||
"result_type": type(parsed).__name__,
|
||||
}, ensure_ascii=False, default=str)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str:
|
||||
|
|
@ -2643,13 +2731,19 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
|
||||
import base64
|
||||
import uuid as uuid_mod
|
||||
from hermes_constants import get_hermes_dir
|
||||
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||
screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
|
||||
# Lightpanda has no graphical renderer — pre-route screenshots to Chrome
|
||||
# via the fallback helper instead of letting the normal path fail with a
|
||||
# CDP error or return a placeholder PNG.
|
||||
# CDP error or return a placeholder PNG. The normal analysis path below
|
||||
# still owns base64 encoding, provider routing, resizing retry, redaction,
|
||||
# and response shape.
|
||||
engine = _get_browser_engine()
|
||||
_lp_prerouted = False
|
||||
_lp_fallback_warning = None
|
||||
if engine == "lightpanda" and _should_inject_engine(engine):
|
||||
logger.debug("browser_vision: pre-routing screenshot to Chrome (engine=lightpanda)")
|
||||
screenshot_args = []
|
||||
|
|
@ -2658,40 +2752,25 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
fb_result = _chrome_fallback_screenshot(
|
||||
effective_task_id, screenshot_args, _get_command_timeout(),
|
||||
)
|
||||
fb_reason = "Lightpanda has no graphical renderer for screenshots; used Chrome for vision capture."
|
||||
fb_result = _annotate_lightpanda_fallback(fb_result, fb_reason)
|
||||
if fb_result.get("success"):
|
||||
# Proceed with the Chrome screenshot for vision analysis
|
||||
_lp_prerouted = True
|
||||
_lp_fallback_warning = fb_result.get("fallback_warning")
|
||||
fb_path = fb_result.get("data", {}).get("path", "")
|
||||
if fb_path and os.path.exists(fb_path):
|
||||
try:
|
||||
with open(fb_path, "rb") as f:
|
||||
image_data = base64.b64encode(f.read()).decode("utf-8")
|
||||
analysis = call_llm(
|
||||
f"Analyze this browser screenshot and answer: {question}",
|
||||
images=[{"data": image_data, "media_type": "image/png"}],
|
||||
task="vision",
|
||||
)
|
||||
from hermes_constants import get_hermes_dir
|
||||
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||
screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Copy to persistent location
|
||||
import shutil as _shutil_vision
|
||||
persistent_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
|
||||
_shutil_vision.copy2(fb_path, persistent_path)
|
||||
return json.dumps({
|
||||
"analysis": analysis,
|
||||
"screenshot_path": str(persistent_path),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Lightpanda Chrome fallback vision failed: %s", e)
|
||||
# Fall through to normal path as last resort
|
||||
# Mark that we already tried the Chrome fallback, so the normal
|
||||
# _run_browser_command path doesn't trigger it a second time.
|
||||
_lp_prerouted = True
|
||||
|
||||
# Save screenshot to persistent location so it can be shared with users
|
||||
from hermes_constants import get_hermes_dir
|
||||
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||
screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
|
||||
from hermes_constants import get_hermes_dir
|
||||
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||
screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
import shutil as _shutil_vision
|
||||
persistent_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
|
||||
_shutil_vision.copy2(fb_path, persistent_path)
|
||||
screenshot_path = persistent_path
|
||||
else:
|
||||
logger.warning("Lightpanda Chrome fallback vision screenshot failed: %s", fb_result.get("error"))
|
||||
# Fall through to normal path as last resort. Mark that we already
|
||||
# tried Chrome so _run_browser_command doesn't recursively fallback.
|
||||
_lp_prerouted = True
|
||||
|
||||
try:
|
||||
screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -2699,29 +2778,47 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
# Prune old screenshots (older than 24 hours) to prevent unbounded disk growth
|
||||
_cleanup_old_screenshots(screenshots_dir, max_age_hours=24)
|
||||
|
||||
# Take screenshot using agent-browser
|
||||
screenshot_args = []
|
||||
if annotate:
|
||||
screenshot_args.append("--annotate")
|
||||
screenshot_args.append("--full")
|
||||
screenshot_args.append(str(screenshot_path))
|
||||
result = _run_browser_command(
|
||||
effective_task_id,
|
||||
"screenshot",
|
||||
screenshot_args,
|
||||
# If the Lightpanda pre-route already failed, force Chrome so
|
||||
# _run_browser_command doesn't trigger a redundant LP fallback.
|
||||
_engine_override="auto" if _lp_prerouted else None,
|
||||
)
|
||||
if _lp_prerouted and screenshot_path.exists():
|
||||
result = {
|
||||
"success": True,
|
||||
"data": {
|
||||
"path": str(screenshot_path),
|
||||
"fallback_warning": _lp_fallback_warning,
|
||||
"browser_engine": "chrome",
|
||||
"browser_engine_fallback": {
|
||||
"from": "lightpanda",
|
||||
"to": "chrome",
|
||||
"reason": "Lightpanda has no graphical renderer for screenshots; used Chrome for vision capture.",
|
||||
},
|
||||
},
|
||||
"fallback_warning": _lp_fallback_warning,
|
||||
"browser_engine": "chrome",
|
||||
}
|
||||
else:
|
||||
# Take screenshot using agent-browser
|
||||
screenshot_args = []
|
||||
if annotate:
|
||||
screenshot_args.append("--annotate")
|
||||
screenshot_args.append("--full")
|
||||
screenshot_args.append(str(screenshot_path))
|
||||
result = _run_browser_command(
|
||||
effective_task_id,
|
||||
"screenshot",
|
||||
screenshot_args,
|
||||
# If the Lightpanda pre-route already failed, force Chrome so
|
||||
# _run_browser_command doesn't trigger a redundant LP fallback.
|
||||
_engine_override="auto" if _lp_prerouted else None,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
error_detail = result.get("error", "Unknown error")
|
||||
_cp = _get_cloud_provider()
|
||||
mode = "local" if _cp is None else f"cloud ({_cp.provider_name()})"
|
||||
return json.dumps({
|
||||
error_response = {
|
||||
"success": False,
|
||||
"error": f"Failed to take screenshot ({mode} mode): {error_detail}"
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
return json.dumps(_copy_fallback_warning(error_response, result), ensure_ascii=False)
|
||||
|
||||
actual_screenshot_path = result.get("data", {}).get("path")
|
||||
if actual_screenshot_path:
|
||||
|
|
@ -2826,6 +2923,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
"analysis": analysis or "Vision analysis returned no content.",
|
||||
"screenshot_path": str(screenshot_path),
|
||||
}
|
||||
_copy_fallback_warning(response_data, result)
|
||||
# Include annotation data if annotated screenshot was taken
|
||||
if annotate and result.get("data", {}).get("annotations"):
|
||||
response_data["annotations"] = result["data"]["annotations"]
|
||||
|
|
@ -2841,6 +2939,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
if screenshot_path.exists():
|
||||
error_info["screenshot_path"] = str(screenshot_path)
|
||||
error_info["note"] = "Screenshot was captured but vision analysis failed. You can still share it via MEDIA:<path>."
|
||||
_copy_fallback_warning(error_info, result if 'result' in locals() else {})
|
||||
return json.dumps(error_info, ensure_ascii=False)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1473,6 +1473,11 @@ def _tool_summary(name: str, result: str, duration_s: float | None) -> str | Non
|
|||
if n is not None:
|
||||
text = f"Extracted {n} {'page' if n == 1 else 'pages'}"
|
||||
|
||||
if isinstance(data, dict) and data.get("fallback_warning"):
|
||||
warning = str(data.get("fallback_warning") or "").strip()
|
||||
if warning:
|
||||
return f"{warning}{suffix}"
|
||||
|
||||
return f"{text}{suffix}" if text else None
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue