fix(browser): surface Lightpanda Chrome fallback warnings

This commit is contained in:
Kshitij Kapoor 2026-05-06 12:49:45 +05:30 committed by kshitij
parent 395dbcc873
commit 3ebdd26449
3 changed files with 504 additions and 208 deletions

View file

@ -157,6 +157,14 @@ class TestNeedsLightpandaFallback:
result = {"success": False, "error": "page.goto: Timeout"} result = {"success": False, "error": "page.goto: Timeout"}
assert _needs_lightpanda_fallback("lightpanda", "open", result) is True assert _needs_lightpanda_fallback("lightpanda", "open", result) is True
def test_failed_command_reason_is_user_visible(self):
from tools.browser_tool import _lightpanda_fallback_reason
result = {"success": False, "error": "page.goto: Timeout"}
reason = _lightpanda_fallback_reason("lightpanda", "open", result)
assert reason is not None
assert "page.goto: Timeout" in reason
assert "retried with Chrome" in reason
def test_empty_snapshot_triggers_fallback(self): def test_empty_snapshot_triggers_fallback(self):
from tools.browser_tool import _needs_lightpanda_fallback from tools.browser_tool import _needs_lightpanda_fallback
result = {"success": True, "data": {"snapshot": ""}} result = {"success": True, "data": {"snapshot": ""}}
@ -260,6 +268,145 @@ class TestCleanupResetsEngineCache:
assert bt._browser_engine_resolved is False assert bt._browser_engine_resolved is False
# ---------------------------------------------------------------------------
# fallback warning annotation
# ---------------------------------------------------------------------------
class TestLightpandaFallbackWarning:
"""Verify Chrome fallback results are annotated for users."""
def test_fallback_result_gets_user_visible_warning(self):
from tools.browser_tool import _annotate_lightpanda_fallback
result = {"success": True, "data": {"snapshot": "- heading \"Hello\" [ref=e1]"}}
annotated = _annotate_lightpanda_fallback(
result,
"Lightpanda returned an empty/too-short snapshot; retried with Chrome.",
)
assert annotated["browser_engine"] == "chrome"
assert "Lightpanda fallback" in annotated["fallback_warning"]
assert annotated["browser_engine_fallback"] == {
"from": "lightpanda",
"to": "chrome",
"reason": "Lightpanda returned an empty/too-short snapshot; retried with Chrome.",
}
assert annotated["data"]["fallback_warning"] == annotated["fallback_warning"]
assert annotated["data"]["browser_engine"] == "chrome"
def test_browser_navigate_surfaces_fallback_warning(self):
import json
import tools.browser_tool as bt
result = bt._annotate_lightpanda_fallback(
{"success": True, "data": {"title": "Fallback OK", "url": "https://example.com/"}},
"synthetic Lightpanda failure; retried with Chrome.",
)
with patch("tools.browser_tool._is_local_backend", return_value=True), \
patch("tools.browser_tool._get_cloud_provider", return_value=None), \
patch("tools.browser_tool._get_session_info", return_value={
"session_name": "test", "_first_nav": False, "features": {"local": True, "proxies": True}
}), \
patch("tools.browser_tool._run_browser_command", side_effect=[
result,
{"success": True, "data": {"snapshot": "- heading \"Fallback OK\" [ref=e1]", "refs": {"e1": {}}}},
]):
response = json.loads(bt.browser_navigate("https://example.com", task_id="warn-test"))
assert response["success"] is True
assert response["browser_engine"] == "chrome"
assert "Lightpanda fallback" in response["fallback_warning"]
assert response["browser_engine_fallback"]["from"] == "lightpanda"
assert response["browser_engine_fallback"]["to"] == "chrome"
bt._last_active_session_key.pop("warn-test", None)
def test_browser_navigate_surfaces_auto_snapshot_fallback_warning(self):
import json
import tools.browser_tool as bt
snapshot_result = bt._annotate_lightpanda_fallback(
{"success": True, "data": {"snapshot": "- heading \"Fallback OK\" [ref=e1]", "refs": {"e1": {}}}},
"Lightpanda returned an empty/too-short snapshot; retried with Chrome.",
)
with patch("tools.browser_tool._is_local_backend", return_value=True), \
patch("tools.browser_tool._get_cloud_provider", return_value=None), \
patch("tools.browser_tool._get_session_info", return_value={
"session_name": "test", "_first_nav": False, "features": {"local": True, "proxies": True}
}), \
patch("tools.browser_tool._run_browser_command", side_effect=[
{"success": True, "data": {"title": "Fallback OK", "url": "https://example.com/"}},
snapshot_result,
]):
response = json.loads(bt.browser_navigate("https://example.com", task_id="warn-test2"))
assert response["success"] is True
assert response["browser_engine"] == "chrome"
assert "Lightpanda fallback" in response["fallback_warning"]
assert response["element_count"] == 1
bt._last_active_session_key.pop("warn-test2", None)
def test_failed_fallback_warning_is_preserved_on_click_error(self):
import json
import tools.browser_tool as bt
result = bt._annotate_lightpanda_fallback(
{"success": False, "error": "Chrome fallback failed"},
"Lightpanda 'click' failed (timeout); retried with Chrome.",
)
bt._last_active_session_key["warn-test3"] = "warn-test3"
with patch("tools.browser_tool._run_browser_command", return_value=result):
response = json.loads(bt.browser_click("@e1", task_id="warn-test3"))
assert response["success"] is False
assert "Lightpanda fallback" in response["fallback_warning"]
assert response["browser_engine"] == "chrome"
bt._last_active_session_key.pop("warn-test3", None)
def test_browser_vision_lightpanda_uses_chrome_capture_and_normal_call_llm_shape(self, tmp_path):
import json
import tools.browser_tool as bt
chrome_shot = tmp_path / "chrome.png"
chrome_shot.write_bytes(b"\x89PNG" + b"0" * 128)
class _Msg:
content = "Example Domain screenshot"
class _Choice:
message = _Msg()
class _Response:
choices = [_Choice()]
captured_kwargs = {}
def fake_call_llm(**kwargs):
captured_kwargs.update(kwargs)
return _Response()
with patch("tools.browser_tool._get_browser_engine", return_value="lightpanda"), \
patch("tools.browser_tool._should_inject_engine", return_value=True), \
patch("tools.browser_tool._chrome_fallback_screenshot", return_value={
"success": True, "data": {"path": str(chrome_shot)}
}), \
patch("hermes_constants.get_hermes_dir", return_value=tmp_path), \
patch("tools.browser_tool.call_llm", side_effect=fake_call_llm):
response = json.loads(bt.browser_vision("what is this?", task_id="vision-test"))
assert response["success"] is True
assert response["analysis"] == "Example Domain screenshot"
assert response["browser_engine"] == "chrome"
assert "Lightpanda fallback" in response["fallback_warning"]
assert "messages" in captured_kwargs
assert "images" not in captured_kwargs
assert captured_kwargs["task"] == "vision"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# _engine_override parameter # _engine_override parameter
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -361,3 +508,48 @@ class TestEngineOverride:
assert "--engine" in captured_cmds[0] assert "--engine" in captured_cmds[0]
engine_idx = captured_cmds[0].index("--engine") engine_idx = captured_cmds[0].index("--engine")
assert captured_cmds[0][engine_idx + 1] == "lightpanda" assert captured_cmds[0][engine_idx + 1] == "lightpanda"
def test_hybrid_local_sidecar_injects_engine_even_with_cloud_provider(self):
"""A task::local sidecar is local even when global cloud config exists."""
import tools.browser_tool as bt
bt._cached_browser_engine = "lightpanda"
bt._browser_engine_resolved = True
captured_cmds = []
mock_provider = MagicMock()
mock_proc = MagicMock()
mock_proc.wait.return_value = None
mock_proc.returncode = 0
def capture_popen(cmd, **kwargs):
captured_cmds.append(cmd)
return mock_proc
mock_stdout = json.dumps({
"success": True,
"data": {"snapshot": '- heading "Hello" [ref=e1]', "refs": {"e1": {}}},
})
with patch("tools.browser_tool._get_session_info", return_value={"session_name": "local-sidecar"}), \
patch("tools.browser_tool._find_agent_browser", return_value="/usr/bin/agent-browser"), \
patch("tools.browser_tool._is_local_mode", return_value=False), \
patch("tools.browser_tool._chromium_installed", return_value=True), \
patch("tools.browser_tool._get_cloud_provider", return_value=mock_provider), \
patch("tools.browser_tool._get_cdp_override", return_value=""), \
patch("tools.browser_tool._is_camofox_mode", return_value=False), \
patch("subprocess.Popen", side_effect=capture_popen), \
patch("os.open", return_value=99), \
patch("os.close"), \
patch("os.unlink"), \
patch("os.makedirs"), \
patch("builtins.open", MagicMock(return_value=MagicMock(
__enter__=MagicMock(return_value=MagicMock(read=MagicMock(return_value=mock_stdout))),
__exit__=MagicMock(return_value=False),
))), \
patch("tools.interrupt.is_interrupted", return_value=False), \
patch("tools.browser_tool._write_owner_pid"):
bt._run_browser_command("task::local", "snapshot", [])
assert len(captured_cmds) == 1
assert "--engine" in captured_cmds[0]
assert captured_cmds[0][captured_cmds[0].index("--engine") + 1] == "lightpanda"

View file

@ -555,18 +555,15 @@ def _should_inject_engine(engine: str) -> bool:
return _is_local_mode() return _is_local_mode()
def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]) -> bool: def _lightpanda_fallback_reason(engine: str, command: str, result: Dict[str, Any]) -> Optional[str]:
"""Check if a Lightpanda result should trigger an automatic Chrome fallback. """Return the user-visible reason a Lightpanda result needs Chrome fallback.
Returns True when: ``None`` means no fallback should run. The returned string is copied into
- The engine is lightpanda AND the fallback result so CLI/TUI/gateway users can see when Hermes silently
- The command is fallback-eligible (not close/record) AND switched from Lightpanda to Chrome for completeness.
- The command failed, OR
- A snapshot came back empty/suspiciously short, OR
- A screenshot returned but is likely the Lightpanda placeholder PNG
""" """
if engine != "lightpanda": if engine != "lightpanda":
return False return None
# Only retry commands where Chrome can meaningfully produce a different # Only retry commands where Chrome can meaningfully produce a different
# result. Session-management commands (close, record) are tied to the # result. Session-management commands (close, record) are tied to the
@ -574,11 +571,12 @@ def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]
_FALLBACK_ELIGIBLE = {"open", "snapshot", "screenshot", "eval", "click", _FALLBACK_ELIGIBLE = {"open", "snapshot", "screenshot", "eval", "click",
"fill", "scroll", "back", "press", "console", "errors"} "fill", "scroll", "back", "press", "console", "errors"}
if command not in _FALLBACK_ELIGIBLE: if command not in _FALLBACK_ELIGIBLE:
return False return None
# Explicit failure # Explicit failure
if not result.get("success"): if not result.get("success"):
return True error = str(result.get("error") or "command failed").strip()
return f"Lightpanda {command!r} failed ({error}); retried with Chrome."
data = result.get("data", {}) data = result.get("data", {})
@ -586,7 +584,7 @@ def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]
snap = data.get("snapshot", "") snap = data.get("snapshot", "")
# Empty or near-empty snapshots indicate Lightpanda couldn't render # Empty or near-empty snapshots indicate Lightpanda couldn't render
if not snap or len(snap.strip()) < 20: if not snap or len(snap.strip()) < 20:
return True return "Lightpanda returned an empty/too-short snapshot; retried with Chrome."
if command == "screenshot": if command == "screenshot":
# Lightpanda returns a placeholder PNG with its panda logo. # Lightpanda returns a placeholder PNG with its panda logo.
@ -599,32 +597,79 @@ def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]
if size < 20480: if size < 20480:
logger.debug("Lightpanda screenshot is suspiciously small (%d bytes), " logger.debug("Lightpanda screenshot is suspiciously small (%d bytes), "
"triggering Chrome fallback", size) "triggering Chrome fallback", size)
return True return (
f"Lightpanda screenshot was suspiciously small ({size} bytes); "
"retried with Chrome."
)
except OSError: except OSError:
return True # file doesn't exist or can't be read return "Lightpanda screenshot file was missing/unreadable; retried with Chrome."
return False return None
def _chrome_fallback_screenshot( def _needs_lightpanda_fallback(engine: str, command: str, result: Dict[str, Any]) -> bool:
"""Check if a Lightpanda result should trigger an automatic Chrome fallback."""
return _lightpanda_fallback_reason(engine, command, result) is not None
def _annotate_lightpanda_fallback(result: Dict[str, Any], reason: str) -> Dict[str, Any]:
"""Add a user-visible Chrome fallback warning to a browser command result."""
warning = (
"⚠ Lightpanda fallback: Chrome was used for this browser action. "
f"{reason}"
)
annotated = dict(result)
annotated["fallback_warning"] = warning
annotated["browser_engine"] = "chrome"
annotated["browser_engine_fallback"] = {
"from": "lightpanda",
"to": "chrome",
"reason": reason,
}
data = annotated.get("data")
if isinstance(data, dict):
data = dict(data)
data.setdefault("fallback_warning", warning)
data.setdefault("browser_engine", "chrome")
data.setdefault(
"browser_engine_fallback",
{"from": "lightpanda", "to": "chrome", "reason": reason},
)
annotated["data"] = data
return annotated
def _copy_fallback_warning(target: Dict[str, Any], result: Dict[str, Any]) -> Dict[str, Any]:
"""Copy browser fallback metadata from an internal result into a tool response."""
if result.get("fallback_warning"):
target["fallback_warning"] = result["fallback_warning"]
target["browser_engine"] = result.get("browser_engine")
target["browser_engine_fallback"] = result.get("browser_engine_fallback")
return target
def _run_chrome_fallback_command(
task_id: str, task_id: str,
command: str,
args: List[str], args: List[str],
timeout: int, timeout: int,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Take a screenshot using a temporary Chrome session. """Run a browser command in a temporary Chrome session at the current URL.
When the active session uses Lightpanda, ``--engine chrome`` on the same agent-browser locks the engine when a named daemon starts. Passing
session has no effect the engine is locked at daemon startup. This ``--engine chrome`` to the same Lightpanda ``--session`` cannot change that
helper spins up a **separate** Chrome session, navigates to the same URL running daemon. This helper always uses a fresh temporary Chrome session,
the agent is currently viewing, takes the screenshot, then tears down the navigates it to the current Lightpanda URL, runs ``command``, then tears it
temporary session. down.
Returns the screenshot result dict (same shape as ``_run_browser_command``).
""" """
import uuid import uuid
# 1. Grab the current URL from the Lightpanda session. # 1. Grab the current URL from the Lightpanda session. Use
url_result = _run_browser_command(task_id, "eval", ["window.location.href"], timeout=10) # ``_engine_override=\"auto\"`` so this helper does not recursively trigger
# Lightpanda→Chrome fallback if the eval call itself fails.
url_result = _run_browser_command(
task_id, "eval", ["window.location.href"], timeout=10, _engine_override="auto"
)
current_url = None current_url = None
if url_result.get("success"): if url_result.get("success"):
current_url = url_result.get("data", {}).get("result", "").strip().strip('"').strip("'") current_url = url_result.get("data", {}).get("result", "").strip().strip('"').strip("'")
@ -647,6 +692,9 @@ def _chrome_fallback_screenshot(
browser_env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir} browser_env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", "")) browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", ""))
if "AGENT_BROWSER_IDLE_TIMEOUT_MS" not in browser_env:
browser_env["AGENT_BROWSER_IDLE_TIMEOUT_MS"] = str(BROWSER_SESSION_INACTIVITY_TIMEOUT * 1000)
def _run_tmp(cmd: str, cmd_args: List[str]) -> Dict[str, Any]: def _run_tmp(cmd: str, cmd_args: List[str]) -> Dict[str, Any]:
full = base_args + [cmd] + cmd_args full = base_args + [cmd] + cmd_args
# Use temp-file stdout/stderr pattern (same as _run_browser_command) # Use temp-file stdout/stderr pattern (same as _run_browser_command)
@ -677,9 +725,9 @@ def _chrome_fallback_screenshot(
except Exception as exc: except Exception as exc:
logger.debug("Chrome fallback tmp cmd '%s' error: %s", cmd, exc) logger.debug("Chrome fallback tmp cmd '%s' error: %s", cmd, exc)
finally: finally:
for p in (stdout_path, stderr_path): for pth in (stdout_path, stderr_path):
try: try:
os.unlink(p) os.unlink(pth)
except OSError: except OSError:
pass pass
return {"success": False, "error": f"Chrome fallback '{cmd}' failed"} return {"success": False, "error": f"Chrome fallback '{cmd}' failed"}
@ -691,9 +739,8 @@ def _chrome_fallback_screenshot(
logger.warning("Chrome fallback: navigate failed: %s", nav.get("error")) logger.warning("Chrome fallback: navigate failed: %s", nav.get("error"))
return {"success": False, "error": f"Chrome fallback navigate failed: {nav.get('error')}"} return {"success": False, "error": f"Chrome fallback navigate failed: {nav.get('error')}"}
# 4. Take the screenshot. # 4. Run the requested command in Chrome.
result = _run_tmp("screenshot", args) return _run_tmp(command, args)
return result
finally: finally:
# 5. Tear down the temporary Chrome session. # 5. Tear down the temporary Chrome session.
@ -706,6 +753,15 @@ def _chrome_fallback_screenshot(
_shutil.rmtree(task_socket_dir, ignore_errors=True) _shutil.rmtree(task_socket_dir, ignore_errors=True)
def _chrome_fallback_screenshot(
task_id: str,
args: List[str],
timeout: int,
) -> Dict[str, Any]:
"""Take a screenshot using a temporary Chrome session."""
return _run_chrome_fallback_command(task_id, "screenshot", args, timeout)
def _auto_local_for_private_urls() -> bool: def _auto_local_for_private_urls() -> bool:
"""Return whether a cloud-configured install should auto-spawn a local """Return whether a cloud-configured install should auto-spawn a local
Chromium for LAN/localhost URLs. Chromium for LAN/localhost URLs.
@ -1665,9 +1721,12 @@ def _run_browser_command(
# Local mode — launch a headless Chromium instance # Local mode — launch a headless Chromium instance
backend_args = ["--session", session_info["session_name"]] backend_args = ["--session", session_info["session_name"]]
# Lightpanda engine injection (local mode only, agent-browser v0.25.3+) # Lightpanda engine injection (local mode only, agent-browser v0.25.3+).
# Use the resolved session backend rather than global cloud-provider state:
# hybrid private-URL routing can create a local sidecar while a cloud
# provider remains configured for public URLs.
engine = _engine_override or _get_browser_engine() engine = _engine_override or _get_browser_engine()
if _should_inject_engine(engine): if engine != "auto" and not _is_camofox_mode() and not session_info.get("cdp_url"):
backend_args += ["--engine", engine] backend_args += ["--engine", engine]
# Keep concrete executable paths intact, even when they contain spaces. # Keep concrete executable paths intact, even when they contain spaces.
@ -1855,14 +1914,21 @@ def _run_browser_command(
# --- Lightpanda automatic Chrome fallback --- # --- Lightpanda automatic Chrome fallback ---
# If engine is lightpanda and the result looks broken, retry with Chrome. # If engine is lightpanda and the result looks broken, retry with Chrome.
# This runs for ALL exit paths (timeout, empty, non-JSON, nonzero rc, parsed). # This runs for ALL exit paths (timeout, empty, non-JSON, nonzero rc, parsed).
if _needs_lightpanda_fallback(engine, command, result): fallback_reason = _lightpanda_fallback_reason(engine, command, result)
logger.info("Lightpanda fallback: retrying '%s' with Chrome (task=%s)", command, task_id) if fallback_reason:
logger.info(
"Lightpanda fallback: retrying '%s' with Chrome (task=%s): %s",
command,
task_id,
fallback_reason,
)
# For screenshots, use the dedicated Chrome fallback helper # For screenshots, use the dedicated Chrome fallback helper
# (spins up a separate Chrome session to the same URL). # (spins up a separate Chrome session to the same URL).
if command == "screenshot": if command == "screenshot":
return _chrome_fallback_screenshot(task_id, args or [], timeout) fallback_result = _chrome_fallback_screenshot(task_id, args or [], timeout)
# For other commands, re-run with engine forced to "auto" (Chrome). else:
return _run_browser_command(task_id, command, args, timeout, _engine_override="auto") fallback_result = _run_chrome_fallback_command(task_id, command, args, timeout)
return _annotate_lightpanda_fallback(fallback_result, fallback_reason)
return result return result
@ -2075,6 +2141,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
"url": final_url, "url": final_url,
"title": title "title": title
} }
_copy_fallback_warning(response, result)
# Detect common "blocked" page patterns from title/url # Detect common "blocked" page patterns from title/url
blocked_patterns = [ blocked_patterns = [
@ -2117,6 +2184,8 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
snapshot_text = _truncate_snapshot(snapshot_text) snapshot_text = _truncate_snapshot(snapshot_text)
response["snapshot"] = snapshot_text response["snapshot"] = snapshot_text
response["element_count"] = len(refs) if refs else 0 response["element_count"] = len(refs) if refs else 0
if snap_result.get("fallback_warning") and not response.get("fallback_warning"):
_copy_fallback_warning(response, snap_result)
except Exception as e: except Exception as e:
logger.debug("Auto-snapshot after navigate failed: %s", e) logger.debug("Auto-snapshot after navigate failed: %s", e)
@ -2173,6 +2242,7 @@ def browser_snapshot(
"snapshot": snapshot_text, "snapshot": snapshot_text,
"element_count": len(refs) if refs else 0 "element_count": len(refs) if refs else 0
} }
_copy_fallback_warning(response, result)
# Merge supervisor state (pending dialogs + frame tree) when a CDP # Merge supervisor state (pending dialogs + frame tree) when a CDP
# supervisor is attached to this task. No-op otherwise. See # supervisor is attached to this task. No-op otherwise. See
@ -2189,10 +2259,11 @@ def browser_snapshot(
return json.dumps(response, ensure_ascii=False) return json.dumps(response, ensure_ascii=False)
else: else:
return json.dumps({ response = {
"success": False, "success": False,
"error": result.get("error", "Failed to get snapshot") "error": result.get("error", "Failed to get snapshot")
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
def browser_click(ref: str, task_id: Optional[str] = None) -> str: def browser_click(ref: str, task_id: Optional[str] = None) -> str:
@ -2219,15 +2290,17 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
result = _run_browser_command(effective_task_id, "click", [ref]) result = _run_browser_command(effective_task_id, "click", [ref])
if result.get("success"): if result.get("success"):
return json.dumps({ response = {
"success": True, "success": True,
"clicked": ref "clicked": ref
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
else: else:
return json.dumps({ response = {
"success": False, "success": False,
"error": result.get("error", f"Failed to click {ref}") "error": result.get("error", f"Failed to click {ref}")
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str: def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
@ -2256,16 +2329,18 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
result = _run_browser_command(effective_task_id, "fill", [ref, text]) result = _run_browser_command(effective_task_id, "fill", [ref, text])
if result.get("success"): if result.get("success"):
return json.dumps({ response = {
"success": True, "success": True,
"typed": text, "typed": text,
"element": ref "element": ref
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
else: else:
return json.dumps({ response = {
"success": False, "success": False,
"error": result.get("error", f"Failed to type into {ref}") "error": result.get("error", f"Failed to type into {ref}")
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
@ -2304,15 +2379,17 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)]) result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)])
if not result.get("success"): if not result.get("success"):
return json.dumps({ response = {
"success": False, "success": False,
"error": result.get("error", f"Failed to scroll {direction}") "error": result.get("error", f"Failed to scroll {direction}")
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
return json.dumps({ response = {
"success": True, "success": True,
"scrolled": direction "scrolled": direction
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
def browser_back(task_id: Optional[str] = None) -> str: def browser_back(task_id: Optional[str] = None) -> str:
@ -2334,15 +2411,17 @@ def browser_back(task_id: Optional[str] = None) -> str:
if result.get("success"): if result.get("success"):
data = result.get("data", {}) data = result.get("data", {})
return json.dumps({ response = {
"success": True, "success": True,
"url": data.get("url", "") "url": data.get("url", "")
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
else: else:
return json.dumps({ response = {
"success": False, "success": False,
"error": result.get("error", "Failed to go back") "error": result.get("error", "Failed to go back")
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
def browser_press(key: str, task_id: Optional[str] = None) -> str: def browser_press(key: str, task_id: Optional[str] = None) -> str:
@ -2364,15 +2443,17 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
result = _run_browser_command(effective_task_id, "press", [key]) result = _run_browser_command(effective_task_id, "press", [key])
if result.get("success"): if result.get("success"):
return json.dumps({ response = {
"success": True, "success": True,
"pressed": key "pressed": key
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
else: else:
return json.dumps({ response = {
"success": False, "success": False,
"error": result.get("error", f"Failed to press {key}") "error": result.get("error", f"Failed to press {key}")
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
@ -2427,13 +2508,17 @@ def browser_console(clear: bool = False, expression: Optional[str] = None, task_
"source": "exception", "source": "exception",
}) })
return json.dumps({ response = {
"success": True, "success": True,
"console_messages": messages, "console_messages": messages,
"js_errors": errors, "js_errors": errors,
"total_messages": len(messages), "total_messages": len(messages),
"total_errors": len(errors), "total_errors": len(errors),
}, ensure_ascii=False) }
_copy_fallback_warning(response, console_result)
if errors_result.get("fallback_warning") and not response.get("fallback_warning"):
_copy_fallback_warning(response, errors_result)
return json.dumps(response, ensure_ascii=False)
def _browser_eval(expression: str, task_id: Optional[str] = None) -> str: def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
@ -2448,14 +2533,16 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
err = result.get("error", "eval failed") err = result.get("error", "eval failed")
# Detect backend capability gaps and give the model a clear signal # Detect backend capability gaps and give the model a clear signal
if any(hint in err.lower() for hint in ("unknown command", "not supported", "not found", "no such command")): if any(hint in err.lower() for hint in ("unknown command", "not supported", "not found", "no such command")):
return json.dumps({ response = {
"success": False, "success": False,
"error": f"JavaScript evaluation is not supported by this browser backend. {err}", "error": f"JavaScript evaluation is not supported by this browser backend. {err}",
}) }
return json.dumps({ return json.dumps(_copy_fallback_warning(response, result))
response = {
"success": False, "success": False,
"error": err, "error": err,
}) }
return json.dumps(_copy_fallback_warning(response, result))
data = result.get("data", {}) data = result.get("data", {})
raw_result = data.get("result") raw_result = data.get("result")
@ -2469,11 +2556,12 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
except (json.JSONDecodeError, ValueError): except (json.JSONDecodeError, ValueError):
pass # keep as string pass # keep as string
return json.dumps({ response = {
"success": True, "success": True,
"result": parsed, "result": parsed,
"result_type": type(parsed).__name__, "result_type": type(parsed).__name__,
}, ensure_ascii=False, default=str) }
return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False, default=str)
def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str: def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str:
@ -2643,13 +2731,19 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
import base64 import base64
import uuid as uuid_mod import uuid as uuid_mod
from hermes_constants import get_hermes_dir
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
effective_task_id = _last_session_key(task_id or "default") effective_task_id = _last_session_key(task_id or "default")
# Lightpanda has no graphical renderer — pre-route screenshots to Chrome # Lightpanda has no graphical renderer — pre-route screenshots to Chrome
# via the fallback helper instead of letting the normal path fail with a # via the fallback helper instead of letting the normal path fail with a
# CDP error or return a placeholder PNG. # CDP error or return a placeholder PNG. The normal analysis path below
# still owns base64 encoding, provider routing, resizing retry, redaction,
# and response shape.
engine = _get_browser_engine() engine = _get_browser_engine()
_lp_prerouted = False _lp_prerouted = False
_lp_fallback_warning = None
if engine == "lightpanda" and _should_inject_engine(engine): if engine == "lightpanda" and _should_inject_engine(engine):
logger.debug("browser_vision: pre-routing screenshot to Chrome (engine=lightpanda)") logger.debug("browser_vision: pre-routing screenshot to Chrome (engine=lightpanda)")
screenshot_args = [] screenshot_args = []
@ -2658,40 +2752,25 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
fb_result = _chrome_fallback_screenshot( fb_result = _chrome_fallback_screenshot(
effective_task_id, screenshot_args, _get_command_timeout(), effective_task_id, screenshot_args, _get_command_timeout(),
) )
fb_reason = "Lightpanda has no graphical renderer for screenshots; used Chrome for vision capture."
fb_result = _annotate_lightpanda_fallback(fb_result, fb_reason)
if fb_result.get("success"): if fb_result.get("success"):
# Proceed with the Chrome screenshot for vision analysis _lp_prerouted = True
_lp_fallback_warning = fb_result.get("fallback_warning")
fb_path = fb_result.get("data", {}).get("path", "") fb_path = fb_result.get("data", {}).get("path", "")
if fb_path and os.path.exists(fb_path): if fb_path and os.path.exists(fb_path):
try: from hermes_constants import get_hermes_dir
with open(fb_path, "rb") as f: screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
image_data = base64.b64encode(f.read()).decode("utf-8") screenshots_dir.mkdir(parents=True, exist_ok=True)
analysis = call_llm( import shutil as _shutil_vision
f"Analyze this browser screenshot and answer: {question}", persistent_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
images=[{"data": image_data, "media_type": "image/png"}], _shutil_vision.copy2(fb_path, persistent_path)
task="vision", screenshot_path = persistent_path
) else:
from hermes_constants import get_hermes_dir logger.warning("Lightpanda Chrome fallback vision screenshot failed: %s", fb_result.get("error"))
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots") # Fall through to normal path as last resort. Mark that we already
screenshots_dir.mkdir(parents=True, exist_ok=True) # tried Chrome so _run_browser_command doesn't recursively fallback.
# Copy to persistent location _lp_prerouted = True
import shutil as _shutil_vision
persistent_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
_shutil_vision.copy2(fb_path, persistent_path)
return json.dumps({
"analysis": analysis,
"screenshot_path": str(persistent_path),
})
except Exception as e:
logger.warning("Lightpanda Chrome fallback vision failed: %s", e)
# Fall through to normal path as last resort
# Mark that we already tried the Chrome fallback, so the normal
# _run_browser_command path doesn't trigger it a second time.
_lp_prerouted = True
# Save screenshot to persistent location so it can be shared with users
from hermes_constants import get_hermes_dir
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
try: try:
screenshots_dir.mkdir(parents=True, exist_ok=True) screenshots_dir.mkdir(parents=True, exist_ok=True)
@ -2699,29 +2778,47 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
# Prune old screenshots (older than 24 hours) to prevent unbounded disk growth # Prune old screenshots (older than 24 hours) to prevent unbounded disk growth
_cleanup_old_screenshots(screenshots_dir, max_age_hours=24) _cleanup_old_screenshots(screenshots_dir, max_age_hours=24)
# Take screenshot using agent-browser if _lp_prerouted and screenshot_path.exists():
screenshot_args = [] result = {
if annotate: "success": True,
screenshot_args.append("--annotate") "data": {
screenshot_args.append("--full") "path": str(screenshot_path),
screenshot_args.append(str(screenshot_path)) "fallback_warning": _lp_fallback_warning,
result = _run_browser_command( "browser_engine": "chrome",
effective_task_id, "browser_engine_fallback": {
"screenshot", "from": "lightpanda",
screenshot_args, "to": "chrome",
# If the Lightpanda pre-route already failed, force Chrome so "reason": "Lightpanda has no graphical renderer for screenshots; used Chrome for vision capture.",
# _run_browser_command doesn't trigger a redundant LP fallback. },
_engine_override="auto" if _lp_prerouted else None, },
) "fallback_warning": _lp_fallback_warning,
"browser_engine": "chrome",
}
else:
# Take screenshot using agent-browser
screenshot_args = []
if annotate:
screenshot_args.append("--annotate")
screenshot_args.append("--full")
screenshot_args.append(str(screenshot_path))
result = _run_browser_command(
effective_task_id,
"screenshot",
screenshot_args,
# If the Lightpanda pre-route already failed, force Chrome so
# _run_browser_command doesn't trigger a redundant LP fallback.
_engine_override="auto" if _lp_prerouted else None,
)
if not result.get("success"): if not result.get("success"):
error_detail = result.get("error", "Unknown error") error_detail = result.get("error", "Unknown error")
_cp = _get_cloud_provider() _cp = _get_cloud_provider()
mode = "local" if _cp is None else f"cloud ({_cp.provider_name()})" mode = "local" if _cp is None else f"cloud ({_cp.provider_name()})"
return json.dumps({ error_response = {
"success": False, "success": False,
"error": f"Failed to take screenshot ({mode} mode): {error_detail}" "error": f"Failed to take screenshot ({mode} mode): {error_detail}"
}, ensure_ascii=False) }
return json.dumps(_copy_fallback_warning(error_response, result), ensure_ascii=False)
actual_screenshot_path = result.get("data", {}).get("path") actual_screenshot_path = result.get("data", {}).get("path")
if actual_screenshot_path: if actual_screenshot_path:
@ -2826,6 +2923,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
"analysis": analysis or "Vision analysis returned no content.", "analysis": analysis or "Vision analysis returned no content.",
"screenshot_path": str(screenshot_path), "screenshot_path": str(screenshot_path),
} }
_copy_fallback_warning(response_data, result)
# Include annotation data if annotated screenshot was taken # Include annotation data if annotated screenshot was taken
if annotate and result.get("data", {}).get("annotations"): if annotate and result.get("data", {}).get("annotations"):
response_data["annotations"] = result["data"]["annotations"] response_data["annotations"] = result["data"]["annotations"]
@ -2841,6 +2939,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
if screenshot_path.exists(): if screenshot_path.exists():
error_info["screenshot_path"] = str(screenshot_path) error_info["screenshot_path"] = str(screenshot_path)
error_info["note"] = "Screenshot was captured but vision analysis failed. You can still share it via MEDIA:<path>." error_info["note"] = "Screenshot was captured but vision analysis failed. You can still share it via MEDIA:<path>."
_copy_fallback_warning(error_info, result if 'result' in locals() else {})
return json.dumps(error_info, ensure_ascii=False) return json.dumps(error_info, ensure_ascii=False)

View file

@ -1473,6 +1473,11 @@ def _tool_summary(name: str, result: str, duration_s: float | None) -> str | Non
if n is not None: if n is not None:
text = f"Extracted {n} {'page' if n == 1 else 'pages'}" text = f"Extracted {n} {'page' if n == 1 else 'pages'}"
if isinstance(data, dict) and data.get("fallback_warning"):
warning = str(data.get("fallback_warning") or "").strip()
if warning:
return f"{warning}{suffix}"
return f"{text}{suffix}" if text else None return f"{text}{suffix}" if text else None