feat: auto-launch Chromium-family browser for CDP

Add browser CDP launch candidates for Chrome, Chromium, Brave, and Edge while preserving Chrome-first selection. Retry candidate launch failures instead of giving up after the first executable.

Update /browser CLI and TUI messaging, docs, and tool descriptions from Chrome-only wording to Chromium-family browser support. Add regression coverage for Brave/Edge paths, Chrome-first precedence, fallback launches, and CDP endpoint probing.
This commit is contained in:
H-Ali13381 2026-05-07 21:40:05 -04:00 committed by Teknium
parent 340d2b6de0
commit 697d38a3f4
19 changed files with 373 additions and 149 deletions

88
cli.py
View file

@ -105,6 +105,7 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
from hermes_constants import get_hermes_home, display_hermes_home
from hermes_cli.browser_connect import (
DEFAULT_BROWSER_CDP_URL,
is_browser_debug_ready,
manual_chrome_debug_command,
try_launch_chrome_debug,
)
@ -8454,10 +8455,10 @@ class HermesCLI:
@staticmethod
def _try_launch_chrome_debug(port: int, system: str) -> bool:
"""Try to launch Chrome/Chromium with remote debugging enabled.
"""Try to launch a Chromium-family browser with remote debugging enabled.
Uses a dedicated user-data-dir so the debug instance doesn't conflict
with an already-running Chrome using the default profile.
with an already-running browser using the default profile.
Returns True if a launch command was executed (doesn't guarantee success).
"""
@ -8502,7 +8503,7 @@ class HermesCLI:
)
def _handle_browser_command(self, cmd: str):
"""Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
"""Handle /browser connect|disconnect|status — manage live Chromium-family CDP connection."""
import platform as _plat
parts = cmd.strip().split(None, 1)
@ -8556,56 +8557,42 @@ class HermesCLI:
print()
# Check if Chrome is already listening on the debug port
import socket
_already_open = False
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect((_host, _port))
s.close()
_already_open = True
except (OSError, socket.timeout):
pass
# Check if a Chromium-family browser is already serving CDP on the debug port
_already_open = is_browser_debug_ready(cdp_url, timeout=1.0)
if _already_open:
print(f" ✓ Chrome is already listening on port {_port}")
print(f" ✓ Chromium-family browser is already listening on port {_port}")
elif cdp_url == _DEFAULT_CDP:
# Try to auto-launch Chrome with remote debugging
print(" Chrome isn't running with remote debugging — attempting to launch...")
# Try to auto-launch a Chromium-family browser with remote debugging
print(" Chromium-family browser isn't running with remote debugging — attempting to launch...")
_launched = self._try_launch_chrome_debug(_port, _plat.system())
if _launched:
# Wait for the port to come up
# Wait for the DevTools discovery endpoint to come up
for _wait in range(10):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect((_host, _port))
s.close()
if is_browser_debug_ready(cdp_url, timeout=1.0):
_already_open = True
break
except (OSError, socket.timeout):
time.sleep(0.5)
time.sleep(0.5)
if _already_open:
print(f" ✓ Chrome launched and listening on port {_port}")
print(f" ✓ Chromium-family browser launched and listening on port {_port}")
else:
print(f"Chrome launched but port {_port} isn't responding yet")
print(f"Browser launched but port {_port} isn't responding yet")
print(" Try again in a few seconds — the debug instance may still be starting")
else:
print(" ⚠ Could not auto-launch Chrome")
print(" ⚠ Could not auto-launch a Chromium-family browser")
sys_name = _plat.system()
chrome_cmd = manual_chrome_debug_command(_port, sys_name)
if chrome_cmd:
print(f" Launch Chrome manually:")
print(f" Launch a Chromium-family browser manually:")
print(f" {chrome_cmd}")
else:
print(" No Chrome/Chromium executable found in this environment")
print(" No supported Chromium-family browser executable found in this environment")
else:
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
if not _already_open:
print()
print("Browser not connected — start Chrome with remote debugging and retry /browser connect")
print("Browser not connected — start a Chromium-family browser with remote debugging and retry /browser connect")
print()
return
@ -8618,20 +8605,23 @@ class HermesCLI:
except Exception:
pass
print()
print("🌐 Browser connected to live Chrome via CDP")
print("🌐 Browser connected to live Chromium-family browser via CDP")
print(f" Endpoint: {cdp_url}")
print()
# Inject context message so the model knows
# Inject context message so the model knows this slash command
# intentionally makes the dev/debug CDP browser available for use.
if hasattr(self, '_pending_input'):
self._pending_input.put(
"[System note: The user has connected your browser tools to their live Chrome browser "
"via Chrome DevTools Protocol. Your browser_navigate, browser_snapshot, browser_click, "
"and other browser tools now control their real browser — including any pages they have "
"open, logged-in sessions, and cookies. They likely opened specific sites or logged into "
"services before connecting. Please await their instruction before attempting to operate "
"the browser. When you do act, be mindful that your actions affect their real browser — "
"don't close tabs or navigate away from pages without asking.]"
"[System note: The user invoked /browser connect and connected your browser tools to "
"a Chromium-family dev/debug browser via Chrome DevTools Protocol. "
"Your browser_navigate, browser_snapshot, browser_click, and other browser tools now "
"control that CDP browser. The command itself is a signal that using browser tools for "
"their current browser-related request is expected; do not wait for separate permission "
"just because CDP is connected. This is typically a Hermes-managed isolated debug "
"profile, not the user's main everyday browser. It is still user-visible and may contain "
"pages, logged-in sessions, or cookies in that debug profile, so avoid destructive actions, "
"closing tabs, or navigating away unless the user's task calls for it.]"
)
elif sub == "disconnect":
@ -8644,24 +8634,24 @@ class HermesCLI:
except Exception:
pass
print()
print("🌐 Browser disconnected from live Chrome")
print("🌐 Browser disconnected from live Chromium-family browser")
print(" Browser tools reverted to default mode (local headless or cloud provider)")
print()
if hasattr(self, '_pending_input'):
self._pending_input.put(
"[System note: The user has disconnected the browser tools from their live Chrome. "
"[System note: The user has disconnected the browser tools from their live Chromium-family browser. "
"Browser tools are back to default mode (headless local browser or cloud provider).]"
)
else:
print()
print("Browser is not connected to live Chrome (already using default mode)")
print("Browser is not connected to a live Chromium-family browser (already using default mode)")
print()
elif sub == "status":
print()
if current:
print("🌐 Browser: connected to live Chrome via CDP")
print("🌐 Browser: connected to live Chromium-family browser via CDP")
print(f" Endpoint: {current}")
_port = 9222
@ -8677,7 +8667,7 @@ class HermesCLI:
s.close()
print(" Status: ✓ reachable")
except (OSError, Exception):
print(" Status: ⚠ not reachable (Chrome may not be running)")
print(" Status: ⚠ not reachable (browser may not be running)")
else:
try:
from tools.browser_tool import _get_cloud_provider
@ -8697,13 +8687,13 @@ class HermesCLI:
if engine == "lightpanda":
print("🌐 Browser: local Lightpanda (agent-browser --engine lightpanda)")
print(" ⚡ Lightpanda: faster navigation, no screenshot support")
print(" Automatic Chrome fallback for screenshots and failed commands")
print(" Automatic Chromium fallback for screenshots and failed commands")
elif engine == "chrome":
print("🌐 Browser: local headless Chrome (agent-browser --engine chrome)")
print("🌐 Browser: local headless Chromium (agent-browser --engine chrome)")
else:
print("🌐 Browser: local headless Chromium (agent-browser)")
print()
print(" /browser connect — connect to your live Chrome")
print(" /browser connect — connect to your live Chromium-family browser")
print(" /browser disconnect — revert to default")
print()
@ -8711,7 +8701,7 @@ class HermesCLI:
print()
print("Usage: /browser connect|disconnect|status")
print()
print(" connect Connect browser tools to your live Chrome session")
print(" connect Connect browser tools to your live Chromium-family browser session")
print(" disconnect Revert to default browser backend")
print(" status Show current browser mode")
print()

View file

@ -1,4 +1,4 @@
"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
"""Shared helpers for attaching Hermes to a local Chromium-family CDP port."""
from __future__ import annotations
@ -21,23 +21,53 @@ _DARWIN_APPS = (
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
)
_WINDOWS_INSTALL_PARTS = (
("Google", "Chrome", "Application", "chrome.exe"),
("Chromium", "Application", "chrome.exe"),
("Chromium", "Application", "chromium.exe"),
("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
("Microsoft", "Edge", "Application", "msedge.exe"),
_WINDOWS_BROWSER_GROUPS = (
(("chrome.exe", "chrome"), (("Google", "Chrome", "Application", "chrome.exe"),)),
(
("chromium.exe", "chromium"),
(("Chromium", "Application", "chrome.exe"), ("Chromium", "Application", "chromium.exe")),
),
(("brave.exe", "brave"), (("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),)),
(("msedge.exe", "msedge"), (("Microsoft", "Edge", "Application", "msedge.exe"),)),
)
_LINUX_BIN_NAMES = (
"google-chrome", "google-chrome-stable", "chromium-browser",
"chromium", "brave-browser", "microsoft-edge",
_WINDOWS_BIN_NAMES = tuple(name for names, _ in _WINDOWS_BROWSER_GROUPS for name in names)
_WINDOWS_INSTALL_PARTS = tuple(parts for _, group in _WINDOWS_BROWSER_GROUPS for parts in group)
_LINUX_BROWSER_GROUPS = (
(
("google-chrome", "google-chrome-stable"),
("/opt/google/chrome/chrome", "/usr/bin/google-chrome", "/usr/bin/google-chrome-stable"),
),
(
("chromium-browser", "chromium"),
("/usr/bin/chromium-browser", "/usr/bin/chromium"),
),
(
("brave-browser", "brave-browser-stable", "brave"),
(
"/usr/bin/brave-browser",
"/usr/bin/brave-browser-stable",
"/usr/bin/brave",
"/snap/bin/brave",
"/opt/brave.com/brave/brave-browser",
"/opt/brave.com/brave/brave",
"/opt/brave-bin/brave",
),
),
(
("microsoft-edge", "microsoft-edge-stable", "msedge"),
(
"/usr/bin/microsoft-edge",
"/usr/bin/microsoft-edge-stable",
"/opt/microsoft/msedge/microsoft-edge",
"/opt/microsoft/msedge/msedge",
),
),
)
_WINDOWS_BIN_NAMES = (
"chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
"chrome", "msedge", "brave", "chromium",
)
_LINUX_BIN_NAMES = tuple(name for names, _ in _LINUX_BROWSER_GROUPS for name in names)
_LINUX_INSTALL_PATHS = tuple(path for _, paths in _LINUX_BROWSER_GROUPS for path in paths)
def get_chrome_debug_candidates(system: str) -> list[str]:
@ -53,10 +83,14 @@ def get_chrome_debug_candidates(system: str) -> list[str]:
candidates.append(path)
seen.add(normalized)
def add_install_paths(bases: tuple[str | None, ...]) -> None:
for base in filter(None, bases):
for parts in _WINDOWS_INSTALL_PARTS:
add(os.path.join(base, *parts))
def add_windows_install_paths(
bases: tuple[str | None, ...],
install_groups: tuple[tuple[tuple[str, ...], tuple[tuple[str, ...], ...]], ...],
) -> None:
for _, group in install_groups:
for base in filter(None, bases):
for parts in group:
add(os.path.join(base, *parts))
if system == "Darwin":
for app in _DARWIN_APPS:
@ -64,18 +98,25 @@ def get_chrome_debug_candidates(system: str) -> list[str]:
return candidates
if system == "Windows":
for name in _WINDOWS_BIN_NAMES:
add(shutil.which(name))
add_install_paths((
install_bases = (
os.environ.get("ProgramFiles"),
os.environ.get("ProgramFiles(x86)"),
os.environ.get("LOCALAPPDATA"),
))
)
for names, install_parts in _WINDOWS_BROWSER_GROUPS:
for name in names:
add(shutil.which(name))
for base in filter(None, install_bases):
for parts in install_parts:
add(os.path.join(base, *parts))
return candidates
for name in _LINUX_BIN_NAMES:
add(shutil.which(name))
add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
for names, paths in _LINUX_BROWSER_GROUPS:
for name in names:
add(shutil.which(name))
for path in paths:
add(path)
add_windows_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"), _WINDOWS_BROWSER_GROUPS)
return candidates
@ -92,6 +133,42 @@ def _chrome_debug_args(port: int) -> list[str]:
]
def is_browser_debug_ready(url: str, timeout: float = 1.0) -> bool:
"""Return True when ``url`` exposes a reachable Chrome DevTools endpoint."""
import socket
import urllib.request
from urllib.parse import urlparse
parsed = urlparse(url if "://" in url else f"http://{url}")
try:
port = parsed.port or (443 if parsed.scheme in {"https", "wss"} else 80)
except ValueError:
return False
if parsed.scheme in {"ws", "wss"} and parsed.path.startswith("/devtools/browser/"):
if not parsed.hostname:
return False
try:
with socket.create_connection((parsed.hostname, port), timeout=timeout):
return True
except OSError:
return False
scheme = {"ws": "http", "wss": "https"}.get(parsed.scheme, parsed.scheme)
if scheme not in {"http", "https"} or not parsed.netloc:
return False
root = f"{scheme}://{parsed.netloc}".rstrip("/")
for probe in (f"{root}/json/version", f"{root}/json"):
try:
with urllib.request.urlopen(probe, timeout=timeout) as resp:
if 200 <= getattr(resp, "status", 200) < 300:
return True
except Exception:
continue
return False
def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
system = system or platform.system()
candidates = get_chrome_debug_candidates(system)
@ -126,13 +203,15 @@ def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str |
return False
os.makedirs(chrome_debug_data_dir(), exist_ok=True)
try:
subprocess.Popen(
[candidates[0], *_chrome_debug_args(port)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
**_detach_kwargs(system),
)
return True
except Exception:
return False
for candidate in candidates:
try:
subprocess.Popen(
[candidate, *_chrome_debug_args(port)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
**_detach_kwargs(system),
)
return True
except Exception:
continue
return False

View file

@ -187,7 +187,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("reload_mcp",)),
CommandDef("reload-skills", "Re-scan ~/.hermes/skills/ for newly installed or removed skills",
"Tools & Skills", aliases=("reload_skills",)),
CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
CommandDef("browser", "Connect browser tools to your live Chromium-family browser via CDP", "Tools & Skills",
cli_only=True, args_hint="[connect|disconnect|status]",
subcommands=("connect", "disconnect", "status")),
CommandDef("plugins", "List installed plugins and their status",

View file

@ -31,7 +31,7 @@ TIPS = [
"/skin changes the CLI theme — try ares, mono, slate, poseidon, or charizard.",
"/statusbar toggles a persistent bar showing model, tokens, context fill %, cost, and duration.",
"/tools disable browser temporarily removes browser tools for the current session.",
"/browser connect attaches browser tools to your running Chrome instance via CDP.",
"/browser connect attaches browser tools to your running Chromium-family browser via CDP.",
"/plugins lists installed plugins and their status.",
"/cron manages scheduled tasks — set up recurring prompts with delivery to any platform.",
"/reload-mcp hot-reloads MCP server configuration without restarting.",
@ -300,7 +300,7 @@ TIPS = [
"Container mode: place .container-mode in HERMES_HOME and the host CLI auto-execs into the container.",
"Ctrl+C has 5 priority tiers: cancel recording → cancel prompts → cancel picker → interrupt agent → exit.",
"Every interrupt during an agent run is logged to ~/.hermes/interrupt_debug.log with timestamps.",
"BROWSER_CDP_URL connects browser tools to any running Chrome — accepts WebSocket, HTTP, or host:port.",
"BROWSER_CDP_URL connects browser tools to any running Chromium-family browser — accepts WebSocket, HTTP, or host:port.",
"BROWSERBASE_ADVANCED_STEALTH=true enables advanced anti-detection with custom Chromium (Scale Plan).",
"The CLI auto-switches to compact mode in terminals narrower than 80 columns.",
"Quick commands support two types: exec (run shell command directly) and alias (redirect to another command).",

View file

@ -1,11 +1,18 @@
"""Tests for CLI browser CDP auto-launch helpers."""
from contextlib import redirect_stdout
from io import StringIO
import os
from queue import Queue
import subprocess
from unittest.mock import patch
from cli import HermesCLI
from hermes_cli.browser_connect import manual_chrome_debug_command
from hermes_cli.browser_connect import (
get_chrome_debug_candidates,
is_browser_debug_ready,
manual_chrome_debug_command,
)
def _assert_chrome_debug_cmd(cmd, expected_chrome, expected_port):
@ -19,7 +26,35 @@ def _assert_chrome_debug_cmd(cmd, expected_chrome, expected_port):
assert "chrome-debug" in user_data_args[0]
class _FakeResponse:
status = 200
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
class TestChromeDebugLaunch:
def test_browser_debug_ready_requires_http_cdp_endpoint(self):
requested = []
def fake_urlopen(url, timeout):
requested.append(url)
if url.endswith("/json/version"):
return _FakeResponse()
raise OSError("unexpected probe")
with patch("urllib.request.urlopen", side_effect=fake_urlopen):
assert is_browser_debug_ready("http://127.0.0.1:9222", timeout=0.1) is True
assert requested == ["http://127.0.0.1:9222/json/version"]
def test_browser_debug_ready_rejects_non_cdp_listener(self):
with patch("urllib.request.urlopen", side_effect=OSError("not cdp")):
assert is_browser_debug_ready("http://127.0.0.1:9222", timeout=0.1) is False
def test_windows_launch_uses_browser_found_on_path(self):
captured = {}
@ -72,6 +107,86 @@ class TestChromeDebugLaunch:
assert command is not None
assert command.startswith("/usr/bin/chromium --remote-debugging-port=9222")
def test_linux_candidates_prefer_chrome_before_brave_when_both_exist(self):
chrome = "/usr/bin/google-chrome"
brave = "/usr/bin/brave-browser"
def fake_which(name):
return {"google-chrome": chrome, "brave-browser": brave}.get(name)
with patch("hermes_cli.browser_connect.shutil.which", side_effect=fake_which), \
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path in {chrome, brave}):
candidates = get_chrome_debug_candidates("Linux")
command = manual_chrome_debug_command(9222, "Linux")
assert candidates[:2] == [chrome, brave]
assert command is not None
assert command.startswith(f"{chrome} --remote-debugging-port=9222")
def test_linux_candidates_prefer_chrome_install_path_before_brave_on_path(self):
chrome = "/opt/google/chrome/chrome"
brave = "/usr/bin/brave-browser"
with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: brave if name == "brave-browser" else None), \
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path in {chrome, brave}):
candidates = get_chrome_debug_candidates("Linux")
assert candidates[:2] == [chrome, brave]
def test_windows_candidates_prefer_chrome_install_path_before_brave_on_path(self, monkeypatch):
program_files = r"C:\Program Files"
chrome = os.path.join(program_files, "Google", "Chrome", "Application", "chrome.exe")
brave = r"C:\Brave\brave.exe"
monkeypatch.setenv("ProgramFiles", program_files)
monkeypatch.delenv("ProgramFiles(x86)", raising=False)
monkeypatch.delenv("LOCALAPPDATA", raising=False)
with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: brave if name == "brave.exe" else None), \
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path in {chrome, brave}):
candidates = get_chrome_debug_candidates("Windows")
assert candidates[:2] == [chrome, brave]
def test_linux_candidates_include_arch_brave_install_path(self):
brave = "/opt/brave-bin/brave"
with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == brave):
candidates = get_chrome_debug_candidates("Linux")
command = manual_chrome_debug_command(9222, "Linux")
assert candidates == [brave]
assert command is not None
assert command.startswith(f"{brave} --remote-debugging-port=9222")
def test_linux_candidates_include_official_brave_and_edge_stable_paths(self):
brave = "/usr/bin/brave-browser-stable"
edge = "/usr/bin/microsoft-edge-stable"
with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path in {brave, edge}):
candidates = get_chrome_debug_candidates("Linux")
assert candidates == [brave, edge]
def test_launch_tries_next_browser_when_first_candidate_fails(self):
brave = "/usr/bin/brave-browser"
chrome = "/usr/bin/google-chrome"
attempts = []
def fake_popen(cmd, **kwargs):
attempts.append(cmd[0])
if cmd[0] == brave:
raise OSError("broken brave install")
return object()
with patch("hermes_cli.browser_connect.get_chrome_debug_candidates", return_value=[brave, chrome]), \
patch("subprocess.Popen", side_effect=fake_popen):
assert HermesCLI._try_launch_chrome_debug(9222, "Linux") is True
assert attempts == [brave, chrome]
def test_manual_command_uses_wsl_windows_chrome_when_available(self):
chrome = "/mnt/c/Program Files/Google/Chrome/Application/chrome.exe"
@ -99,3 +214,28 @@ class TestChromeDebugLaunch:
with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
patch("hermes_cli.browser_connect.os.path.isfile", return_value=False):
assert manual_chrome_debug_command(9222, "Linux") is None
def test_connect_context_note_allows_expected_browser_use(self, monkeypatch):
"""`/browser connect` is an instruction to use the CDP browser.
The queued context note must not tell the model to wait for a second
permission step or imply that the attached browser is the user's main
everyday Chrome profile.
"""
cli = HermesCLI.__new__(HermesCLI)
cli._pending_input = Queue()
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
with patch("cli.is_browser_debug_ready", return_value=True), \
patch("tools.browser_tool.cleanup_all_browsers"), \
patch("tools.browser_tool._ensure_cdp_supervisor"), \
redirect_stdout(StringIO()):
cli._handle_browser_command("/browser connect")
note = cli._pending_input.get_nowait()
assert "Chromium-family" in note
assert "dev/debug" in note
assert "using browser tools for their current browser-related request is expected" in note
assert "live Chrome browser" not in note
assert "real browser" not in note
assert "Please await their instruction" not in note

View file

@ -3914,7 +3914,7 @@ def test_browser_manage_connect_sets_env_and_cleans_twice(monkeypatch):
assert resp["result"]["connected"] is True
assert resp["result"]["url"] == "http://127.0.0.1:9222"
assert resp["result"]["messages"] == ["Chrome is already listening on port 9222"]
assert resp["result"]["messages"] == ["Chromium-family browser is already listening on port 9222"]
assert os.environ.get("BROWSER_CDP_URL") == "http://127.0.0.1:9222"
# First cleanup runs against the OLD env (none here), second against the NEW.
assert cleanup_calls == ["", "http://127.0.0.1:9222"]
@ -3934,7 +3934,7 @@ def test_browser_manage_connect_defaults_to_loopback(monkeypatch):
assert resp["result"]["connected"] is True
assert resp["result"]["url"] == "http://127.0.0.1:9222"
assert resp["result"]["messages"] == ["Chrome is already listening on port 9222"]
assert resp["result"]["messages"] == ["Chromium-family browser is already listening on port 9222"]
assert urls[0] == "http://127.0.0.1:9222/json/version"
@ -3977,10 +3977,10 @@ def test_browser_manage_connect_default_local_reports_launch_hint(monkeypatch):
assert resp["result"]["url"] == "http://127.0.0.1:9222"
assert (
resp["result"]["messages"][0]
== "Chrome isn't running with remote debugging — attempting to launch..."
== "Chromium-family browser isn't running with remote debugging — attempting to launch..."
)
assert any(
"No Chrome/Chromium executable was found" in line
"No supported Chromium-family browser executable was found" in line
for line in resp["result"]["messages"]
)
assert any(
@ -4107,8 +4107,8 @@ def test_browser_manage_connect_default_local_retries_after_launch(monkeypatch):
assert resp["result"]["connected"] is True
assert resp["result"]["url"] == "http://127.0.0.1:9222"
assert resp["result"]["messages"] == [
"Chrome isn't running with remote debugging — attempting to launch...",
"Chrome launched and listening on port 9222",
"Chromium-family browser isn't running with remote debugging — attempting to launch...",
"Chromium-family browser launched and listening on port 9222",
]
assert os.environ["BROWSER_CDP_URL"] == "http://127.0.0.1:9222"

View file

@ -56,7 +56,7 @@ def get_camofox_url() -> str:
def is_camofox_mode() -> bool:
"""True when Camofox backend is configured and no CDP override is active.
When the user has explicitly connected to a live Chrome instance via
When the user has explicitly connected to a live Chromium-family browser via
``/browser connect`` (which sets ``BROWSER_CDP_URL``), the CDP connection
takes priority over Camofox so the browser tools operate on the real
browser instead of being silently routed to the Camofox backend.

View file

@ -358,8 +358,9 @@ def browser_cdp(
if not endpoint:
return tool_error(
"No CDP endpoint is available. Run '/browser connect' to attach "
"to a running Chrome, or set 'browser.cdp_url' in config.yaml. "
"The Camofox backend is REST-only and does not expose CDP.",
"to a running Chrome, Brave, Chromium, or Edge browser, or set "
"'browser.cdp_url' in config.yaml. The Camofox backend is REST-only "
"and does not expose CDP.",
cdp_docs=CDP_DOCS_URL,
)
@ -367,8 +368,8 @@ def browser_cdp(
return tool_error(
f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
"Expected ws://... or wss://... — the /browser connect "
"resolver should have rewritten this. Check that Chrome is "
"actually listening on the debug port."
"resolver should have rewritten this. Check that a Chromium-family "
"browser is actually listening on the debug port."
)
call_params: Dict[str, Any] = params or {}
@ -431,12 +432,12 @@ BROWSER_CDP_SCHEMA: Dict[str, Any] = {
"browser operations not covered by browser_navigate, browser_click, "
"browser_console, etc.\n\n"
"**Requires a reachable CDP endpoint.** Available when the user has "
"run '/browser connect' to attach to a running Chrome, or when "
"'browser.cdp_url' is set in config.yaml. Not currently wired up for "
"cloud backends (Browserbase, Browser Use, Firecrawl) — those expose "
"CDP per session but live-session routing is a follow-up. Camofox is "
"REST-only and will never support CDP. If the tool is in your toolset "
"at all, a CDP endpoint is already reachable.\n\n"
"run '/browser connect' to attach to a running Chrome, Brave, Chromium, "
"or Edge browser, or when 'browser.cdp_url' is set in config.yaml. "
"Not currently wired up for cloud backends (Browserbase, Browser Use, "
"Firecrawl) — those expose CDP per session but live-session routing is "
"a follow-up. Camofox is REST-only and will never support CDP. If the "
"tool is in your toolset at all, a CDP endpoint is already reachable.\n\n"
f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a "
"method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') "
"to look up parameters and return shape.\n\n"

View file

@ -6,7 +6,7 @@ accept or dismiss.
Gated on the same ``_browser_cdp_check`` as ``browser_cdp`` so it only
appears when a CDP endpoint is reachable (Browserbase with a
``connectUrl``, local Chrome via ``/browser connect``, or
``connectUrl``, local Chromium-family browser via ``/browser connect``, or
``browser.cdp_url`` set in config).
See ``website/docs/developer-guide/browser-supervisor.md`` for the full
@ -40,7 +40,7 @@ BROWSER_DIALOG_SCHEMA: Dict[str, Any] = {
"happens when a second dialog fires while the first is still open), "
"pass ``dialog_id`` from the snapshot to disambiguate.\n\n"
"**Availability:** only present when a CDP-capable backend is "
"attached — Browserbase sessions, local Chrome via "
"attached — Browserbase sessions, local Chromium-family browser via "
"``/browser connect``, or ``browser.cdp_url`` in config.yaml. "
"Not available on Camofox (REST-only) or the default Playwright "
"local browser (CDP port is hidden)."

View file

@ -6087,17 +6087,17 @@ def _failure_messages(url: str, port: int, system: str) -> list[str]:
command = manual_chrome_debug_command(port, system)
hint = (
["Start Chrome with remote debugging, then retry /browser connect:", command]
["Start a Chromium-family browser with remote debugging, then retry /browser connect:", command]
if command
else [
"No Chrome/Chromium executable was found in this environment.",
f"Install one or start Chrome with --remote-debugging-port={port}, then retry /browser connect.",
"No supported Chromium-family browser executable was found in this environment.",
f"Install one or start a Chromium-family browser with --remote-debugging-port={port}, then retry /browser connect.",
]
)
return [
f"Chrome is not reachable at {url}.",
f"Browser CDP is not reachable at {url}.",
*hint,
"Browser not connected — start Chrome with remote debugging and retry /browser connect",
"Browser not connected — start a Chromium-family browser with remote debugging and retry /browser connect",
]
@ -6183,7 +6183,7 @@ def _browser_connect(rid, params: dict) -> dict:
from hermes_cli.browser_connect import try_launch_chrome_debug
announce(
"Chrome isn't running with remote debugging — attempting to launch..."
"Chromium-family browser isn't running with remote debugging — attempting to launch..."
)
if try_launch_chrome_debug(port, system):
@ -6194,7 +6194,7 @@ def _browser_connect(rid, params: dict) -> dict:
break
if ok:
announce(f"Chrome launched and listening on port {port}")
announce(f"Chromium-family browser launched and listening on port {port}")
else:
for line in _failure_messages(url, port, system)[1:]:
announce(line, level="error")
@ -6204,7 +6204,7 @@ def _browser_connect(rid, params: dict) -> dict:
elif not ok:
return _err(rid, 5031, f"could not reach browser CDP at {url}")
elif _is_default_local_cdp(parsed):
announce(f"Chrome is already listening on port {port}")
announce(f"Chromium-family browser is already listening on port {port}")
normalized = _normalize_cdp_url(parsed)

View file

@ -379,11 +379,11 @@ describe('createGatewayEventHandler', () => {
const handler = createGatewayEventHandler(ctx)
handler({
payload: { message: 'Chrome launched and listening on port 9222' },
payload: { message: 'Chromium-family browser launched and listening on port 9222' },
type: 'browser.progress'
} as any)
expect(ctx.system.sys).toHaveBeenCalledWith('Chrome launched and listening on port 9222')
expect(ctx.system.sys).toHaveBeenCalledWith('Chromium-family browser launched and listening on port 9222')
})
it('annotates gateway.start_timeout with stderr tail lines so users can diagnose without /logs', () => {

View file

@ -387,8 +387,8 @@ describe('createSlashHandler', () => {
Promise.resolve({
connected: false,
messages: [
"Chrome isn't running with remote debugging — attempting to launch...",
'Browser not connected — start Chrome with remote debugging and retry /browser connect'
"Chromium-family browser isn't running with remote debugging — attempting to launch...",
'Browser not connected — start a Chromium-family browser with remote debugging and retry /browser connect'
],
url: 'http://127.0.0.1:9222'
})
@ -397,14 +397,14 @@ describe('createSlashHandler', () => {
const ctx = buildCtx({ gateway: { ...buildGateway(), rpc } })
expect(createSlashHandler(ctx)('/browser connect')).toBe(true)
expect(ctx.transcript.sys).toHaveBeenCalledWith('checking Chrome remote debugging at http://127.0.0.1:9222...')
expect(ctx.transcript.sys).toHaveBeenCalledWith('checking Chromium-family browser remote debugging at http://127.0.0.1:9222...')
await vi.waitFor(() => {
expect(ctx.transcript.sys).toHaveBeenCalledWith(
"Chrome isn't running with remote debugging — attempting to launch..."
"Chromium-family browser isn't running with remote debugging — attempting to launch..."
)
expect(ctx.transcript.sys).toHaveBeenCalledWith(
'Browser not connected — start Chrome with remote debugging and retry /browser connect'
'Browser not connected — start a Chromium-family browser with remote debugging and retry /browser connect'
)
expect(ctx.transcript.sys).not.toHaveBeenCalledWith('browser connect failed')
})

View file

@ -155,7 +155,7 @@ export const opsCommands: SlashCommand[] = [
const url = action === 'connect' ? rest.join(' ').trim() || 'http://127.0.0.1:9222' : undefined
if (url) {
ctx.transcript.sys(`checking Chrome remote debugging at ${url}...`)
ctx.transcript.sys(`checking Chromium-family browser remote debugging at ${url}...`)
}
ctx.gateway
@ -181,7 +181,7 @@ export const opsCommands: SlashCommand[] = [
}
if (r.connected) {
ctx.transcript.sys('Browser connected to live Chrome via CDP')
ctx.transcript.sys('Browser connected to live Chromium-family browser via CDP')
ctx.transcript.sys(`Endpoint: ${r.url || '(url unavailable)'}`)
ctx.transcript.sys('next browser tool call will use this CDP endpoint')
}

View file

@ -217,6 +217,6 @@ Issue planned against `jo-inc/camofox-browser` adding:
Unit tests use an asyncio mock CDP server that speaks enough of the protocol
to exercise all state transitions: attach, enable, navigate, dialog fire,
dialog dismiss, frame attach/detach, child target attach, session teardown.
Real-backend E2E (Browserbase + local Chrome) is manual — exercise via
`/browser connect` to a live Chrome and run the dialog/frame test cases
described above.
Real-backend E2E (Browserbase + local Chromium-family browser) is manual — exercise via
`/browser connect` to a live Chromium-family browser and run the dialog/frame
test cases described above.

View file

@ -46,7 +46,7 @@ Hermes includes full browser automation with multiple backend options for naviga
- **Browserbase** — Managed cloud browsers with anti-bot tooling, CAPTCHA solving, and residential proxies
- **Browser Use** — Alternative cloud browser provider
- **Local Chrome via CDP** — Connect to your running Chrome instance using `/browser connect`
- **Local Chromium-family CDP** — Connect to your running Chrome, Brave, Chromium, or Edge browser using `/browser connect`
- **Local Chromium** — Headless local browser via the `agent-browser` CLI
See [Browser Automation](/docs/user-guide/features/browser) for setup and usage.

View file

@ -85,7 +85,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|---------|-------------|
| `/tools [list\|disable\|enable] [name...]` | Manage tools: list available tools, or disable/enable specific tools for the current session. Disabling a tool removes it from the agent's toolset and triggers a session reset. |
| `/toolsets` | List available toolsets |
| `/browser [connect\|disconnect\|status]` | Manage local Chrome CDP connection. `connect` attaches browser tools to a running Chrome instance (default: `ws://localhost:9222`). `disconnect` detaches. `status` shows current connection. Auto-launches Chrome if no debugger is detected. |
| `/browser [connect\|disconnect\|status]` | Manage a local Chromium-family CDP connection. `connect` attaches browser tools to a running Chrome, Brave, Chromium, or Edge instance (default: `http://127.0.0.1:9222`). `disconnect` detaches. `status` shows current connection. Auto-launches a supported Chromium-family browser if no debugger is detected. |
| `/skills` | Search, install, inspect, or manage skills from online registries |
| `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) |
| `/curator` | Background skill maintenance — `status`, `run`, `pin`, `archive`. See [Curator](/docs/user-guide/features/curator). |

View file

@ -1505,11 +1505,11 @@ browser:
command_timeout: 30 # Timeout in seconds for browser commands (screenshot, navigate, etc.)
record_sessions: false # Auto-record browser sessions as WebM videos to ~/.hermes/browser_recordings/
# Optional CDP override — when set, Hermes attaches directly to your own
# Chrome (via /browser connect) rather than starting a headless browser.
# Chromium-family browser (via /browser connect) rather than starting a headless browser.
cdp_url: ""
# Dialog supervisor — controls how native JS dialogs (alert / confirm / prompt)
# are handled when a CDP backend is attached (Browserbase, local Chrome via
# /browser connect). Ignored on Camofox and default local agent-browser mode.
# are handled when a CDP backend is attached (Browserbase, local Chromium-family
# browser via /browser connect). Ignored on Camofox and default local agent-browser mode.
dialog_policy: must_respond # must_respond | auto_dismiss | auto_accept
dialog_timeout_s: 300 # Safety auto-dismiss under must_respond (seconds)
camofox:
@ -1527,7 +1527,7 @@ browser:
See the [browser feature page](./features/browser.md#browser_dialog) for the full dialog workflow.
The browser toolset supports multiple providers. See the [Browser feature page](/docs/user-guide/features/browser) for details on Browserbase, Browser Use, and local Chrome CDP setup.
The browser toolset supports multiple providers. See the [Browser feature page](/docs/user-guide/features/browser) for details on Browserbase, Browser Use, and local Chromium-family CDP setup.
## Timezone

View file

@ -1,6 +1,6 @@
---
title: Browser Automation
description: Control browsers with multiple providers, local Chrome via CDP, or cloud browsers for web interaction, form filling, scraping, and more.
description: Control browsers with multiple providers, local Chromium-family browsers via CDP, or cloud browsers for web interaction, form filling, scraping, and more.
sidebar_label: Browser
sidebar_position: 5
---
@ -13,7 +13,7 @@ Hermes Agent includes a full browser automation toolset with multiple backend op
- **Browser Use cloud mode** via [Browser Use](https://browser-use.com) as an alternative cloud browser provider
- **Firecrawl cloud mode** via [Firecrawl](https://firecrawl.dev) for cloud browsers with built-in scraping
- **Camofox local mode** via [Camofox](https://github.com/jo-inc/camofox-browser) for local anti-detection browsing (Firefox-based fingerprint spoofing)
- **Local Chrome via CDP** — connect browser tools to your own Chrome instance using `/browser connect`
- **Local Chromium-family CDP** — connect browser tools to your own Chrome, Brave, Chromium, or Edge instance using `/browser connect`
- **Local browser mode** via the `agent-browser` CLI and a local Chromium installation
In all modes, the agent can navigate websites, interact with page elements, fill forms, and extract information.
@ -25,7 +25,7 @@ Pages are represented as **accessibility trees** (text-based snapshots), making
Key capabilities:
- **Multi-provider cloud execution** — Browserbase, Browser Use, or Firecrawl — no local browser needed
- **Local Chrome integration** — attach to your running Chrome via CDP for hands-on browsing
- **Local Chromium-family integration** — attach to your running Chrome, Brave, Chromium, or Edge browser via CDP for hands-on browsing
- **Built-in stealth** — random fingerprints, CAPTCHA solving, residential proxies (Browserbase)
- **Session isolation** — each task gets its own browser session
- **Automatic cleanup** — inactive sessions are closed after a timeout
@ -285,9 +285,9 @@ Adoption only fires until `tab_id` is populated for the session. If the external
When Camofox runs in headed mode (with a visible browser window), it exposes a VNC port in its health check response. Hermes automatically discovers this and includes the VNC URL in navigation responses, so the agent can share a link for you to watch the browser live.
### Local Chrome via CDP (`/browser connect`)
### Local Chromium-family browser via CDP (`/browser connect`)
Instead of a cloud provider, you can attach Hermes browser tools to your own running Chrome instance via the Chrome DevTools Protocol (CDP). This is useful when you want to see what the agent is doing in real-time, interact with pages that require your own cookies/sessions, or avoid cloud browser costs.
Instead of a cloud provider, you can attach Hermes browser tools to your own running Chrome, Brave, Chromium, or Edge instance via the Chrome DevTools Protocol (CDP). This is useful when you want to see what the agent is doing in real-time, interact with pages that require your own cookies/sessions, or avoid cloud browser costs.
:::note
`/browser connect` is an **interactive-CLI slash command** — it is not dispatched by the gateway. If you try to run it inside a WebUI, Telegram, Discord, or other gateway chat, the message will be sent to the agent as plain text and the command will not execute. Start Hermes from the terminal (`hermes` or `hermes chat`) and issue `/browser connect` there.
@ -296,26 +296,40 @@ Instead of a cloud provider, you can attach Hermes browser tools to your own run
In the CLI, use:
```
/browser connect # Connect to Chrome at ws://localhost:9222
/browser connect # Auto-launch/connect to a local Chromium-family browser at http://127.0.0.1:9222
/browser connect ws://host:port # Connect to a specific CDP endpoint
/browser status # Check current connection
/browser disconnect # Detach and return to cloud/local mode
/browser status # Check current connection
/browser disconnect # Detach and return to cloud/local mode
```
If Chrome isn't already running with remote debugging, Hermes will attempt to auto-launch it with `--remote-debugging-port=9222`.
If a browser isn't already running with remote debugging, Hermes will attempt to auto-launch a supported Chromium-family browser with `--remote-debugging-port=9222`. Detection includes Brave, Google Chrome, Chromium, and Microsoft Edge, with common Linux install paths such as `/opt/brave-bin/brave` and `/snap/bin/brave`.
:::tip
To start Chrome manually with CDP enabled, use a dedicated user-data-dir so the debug port actually comes up even if Chrome is already running with your normal profile:
To start a Chromium-family browser manually with CDP enabled, use a dedicated user-data-dir so the debug port actually comes up even if the browser is already running with your normal profile:
```bash
# Linux
# Linux — Brave
brave-browser \
--remote-debugging-port=9222 \
--user-data-dir=$HOME/.hermes/chrome-debug \
--no-first-run \
--no-default-browser-check &
# Linux — Google Chrome
google-chrome \
--remote-debugging-port=9222 \
--user-data-dir=$HOME/.hermes/chrome-debug \
--no-first-run \
--no-default-browser-check &
# macOS
# macOS — Brave
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser" \
--remote-debugging-port=9222 \
--user-data-dir="$HOME/.hermes/chrome-debug" \
--no-first-run \
--no-default-browser-check &
# macOS — Google Chrome
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
--remote-debugging-port=9222 \
--user-data-dir="$HOME/.hermes/chrome-debug" \
@ -325,10 +339,10 @@ google-chrome \
Then launch the Hermes CLI and run `/browser connect`.
**Why `--user-data-dir`?** Without it, launching Chrome while a regular Chrome instance is already running typically opens a new window on the existing process — and that existing process was not started with `--remote-debugging-port`, so port 9222 never opens. A dedicated user-data-dir forces a fresh Chrome process where the debug port actually listens. `--no-first-run --no-default-browser-check` skips the first-launch wizard for the fresh profile.
**Why `--user-data-dir`?** Without it, launching a Chromium-family browser while a regular instance is already running typically opens a new window on the existing process — and that existing process was not started with `--remote-debugging-port`, so port 9222 never opens. A dedicated user-data-dir forces a fresh browser process where the debug port actually listens. `--no-first-run --no-default-browser-check` skips the first-launch wizard for the fresh profile.
:::
When connected via CDP, all browser tools (`browser_navigate`, `browser_click`, etc.) operate on your live Chrome instance instead of spinning up a cloud session.
When connected via CDP, all browser tools (`browser_navigate`, `browser_click`, etc.) operate on your live browser instance instead of spinning up a cloud session.
### WSL2 + Windows Chrome: prefer MCP over `/browser connect`
@ -489,7 +503,7 @@ When a CDP supervisor is active for the current session (typical for any session
Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.
**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up.
**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, Brave, Chromium, or Edge browser, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up.
**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape.

View file

@ -28,7 +28,7 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
## Media & Web
- **[Voice Mode](voice-mode.md)** — Full voice interaction across CLI and messaging platforms. Talk to the agent using your microphone, hear spoken replies, and have live voice conversations in Discord voice channels.
- **[Browser Automation](browser.md)** — Full browser automation with multiple backends: Browserbase cloud, Browser Use cloud, local Chrome via CDP, or local Chromium. Navigate websites, fill forms, and extract information.
- **[Browser Automation](browser.md)** — Full browser automation with multiple backends: Browserbase cloud, Browser Use cloud, local Chrome/Brave/Chromium/Edge via CDP, or local Chromium. Navigate websites, fill forms, and extract information.
- **[Vision & Image Paste](vision.md)** — Multimodal vision support. Paste images from your clipboard into the CLI and ask the agent to analyze, describe, or work with them using any vision-capable model.
- **[Image Generation](image-generation.md)** — Generate images from text prompts using FAL.ai. Nine models supported (FLUX 2 Klein/Pro, GPT-Image 1.5/2, Nano Banana Pro, Ideogram V3, Recraft V4 Pro, Qwen, Z-Image Turbo); pick one via `hermes tools`.
- **[Voice & TTS](tts.md)** — Text-to-speech output and voice message transcription across all messaging platforms, with ten native provider options: Edge TTS (free), ElevenLabs, OpenAI TTS, MiniMax, Mistral Voxtral, Google Gemini, xAI, NeuTTS, KittenTTS, and Piper — plus custom command providers for any local TTS CLI.