feat(browser): /browser connect — attach browser tools to live Chrome via CDP (#1549)

feat(browser): /browser connect — attach browser tools to live Chrome via CDP
This commit is contained in:
Teknium 2026-03-16 07:32:07 -07:00 committed by GitHub
commit 91b9495b04
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 240 additions and 2 deletions

208
cli.py
View file

@ -1459,6 +1459,8 @@ class HermesCLI:
return "Processing skills command..."
if cmd_lower == "/reload-mcp":
return "Reloading MCP servers..."
if cmd_lower.startswith("/browser"):
return "Configuring browser..."
return "Processing command..."
def _command_spinner_frame(self) -> str:
@ -3294,6 +3296,8 @@ class HermesCLI:
elif cmd_lower == "/reload-mcp":
with self._busy_command(self._slow_command_status(cmd_original)):
self._reload_mcp()
elif cmd_lower.startswith("/browser"):
self._handle_browser_command(cmd_original)
elif cmd_lower == "/plugins":
try:
from hermes_cli.plugins import get_plugin_manager
@ -3533,6 +3537,210 @@ class HermesCLI:
self._background_tasks[task_id] = thread
thread.start()
@staticmethod
def _try_launch_chrome_debug(port: int, system: str) -> bool:
"""Try to launch Chrome/Chromium with remote debugging enabled.
Returns True if a launch command was executed (doesn't guarantee success).
"""
import shutil
import subprocess as _sp
candidates = []
if system == "Darwin":
# macOS: try common app bundle locations
for app in (
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
):
if os.path.isfile(app):
candidates.append(app)
else:
# Linux: try common binary names
for name in ("google-chrome", "google-chrome-stable", "chromium-browser",
"chromium", "brave-browser", "microsoft-edge"):
path = shutil.which(name)
if path:
candidates.append(path)
if not candidates:
return False
chrome = candidates[0]
try:
_sp.Popen(
[chrome, f"--remote-debugging-port={port}"],
stdout=_sp.DEVNULL,
stderr=_sp.DEVNULL,
start_new_session=True, # detach from terminal
)
return True
except Exception:
return False
def _handle_browser_command(self, cmd: str):
"""Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
import platform as _plat
import subprocess as _sp
parts = cmd.strip().split(None, 1)
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
_DEFAULT_CDP = "ws://localhost:9222"
current = os.environ.get("BROWSER_CDP_URL", "").strip()
if sub.startswith("connect"):
# Optionally accept a custom CDP URL: /browser connect ws://host:port
connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."]
cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP
# Clear any existing browser sessions so the next tool call uses the new backend
try:
from tools.browser_tool import cleanup_all_browsers
cleanup_all_browsers()
except Exception:
pass
print()
# Extract port for connectivity checks
_port = 9222
try:
_port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0])
except (ValueError, IndexError):
pass
# Check if Chrome is already listening on the debug port
import socket
_already_open = False
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect(("127.0.0.1", _port))
s.close()
_already_open = True
except (OSError, socket.timeout):
pass
if _already_open:
print(f" ✓ Chrome is already listening on port {_port}")
elif cdp_url == _DEFAULT_CDP:
# Try to auto-launch Chrome with remote debugging
print(" Chrome isn't running with remote debugging — attempting to launch...")
_launched = self._try_launch_chrome_debug(_port, _plat.system())
if _launched:
# Wait for the port to come up
import time as _time
for _wait in range(10):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect(("127.0.0.1", _port))
s.close()
_already_open = True
break
except (OSError, socket.timeout):
_time.sleep(0.5)
if _already_open:
print(f" ✓ Chrome launched and listening on port {_port}")
else:
print(f" ⚠ Chrome launched but port {_port} isn't responding yet")
print(" You may need to close existing Chrome windows first and retry")
else:
print(f" ⚠ Could not auto-launch Chrome")
# Show manual instructions as fallback
sys_name = _plat.system()
if sys_name == "Darwin":
chrome_cmd = 'open -a "Google Chrome" --args --remote-debugging-port=9222'
elif sys_name == "Windows":
chrome_cmd = 'chrome.exe --remote-debugging-port=9222'
else:
chrome_cmd = "google-chrome --remote-debugging-port=9222"
print(f" Launch Chrome manually: {chrome_cmd}")
else:
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
os.environ["BROWSER_CDP_URL"] = cdp_url
print()
print("🌐 Browser connected to live Chrome via CDP")
print(f" Endpoint: {cdp_url}")
print()
# Inject context message so the model knows
if hasattr(self, '_pending_input'):
self._pending_input.put(
"[System note: The user has connected your browser tools to their live Chrome browser "
"via Chrome DevTools Protocol. Your browser_navigate, browser_snapshot, browser_click, "
"and other browser tools now control their real browser — including any pages they have "
"open, logged-in sessions, and cookies. They likely opened specific sites or logged into "
"services before connecting. Please await their instruction before attempting to operate "
"the browser. When you do act, be mindful that your actions affect their real browser — "
"don't close tabs or navigate away from pages without asking.]"
)
elif sub == "disconnect":
if current:
os.environ.pop("BROWSER_CDP_URL", None)
try:
from tools.browser_tool import cleanup_all_browsers
cleanup_all_browsers()
except Exception:
pass
print()
print("🌐 Browser disconnected from live Chrome")
print(" Browser tools reverted to default mode (local headless or Browserbase)")
print()
if hasattr(self, '_pending_input'):
self._pending_input.put(
"[System note: The user has disconnected the browser tools from their live Chrome. "
"Browser tools are back to default mode (headless local browser or Browserbase cloud).]"
)
else:
print()
print("Browser is not connected to live Chrome (already using default mode)")
print()
elif sub == "status":
print()
if current:
print(f"🌐 Browser: connected to live Chrome via CDP")
print(f" Endpoint: {current}")
_port = 9222
try:
_port = int(current.rsplit(":", 1)[-1].split("/")[0])
except (ValueError, IndexError):
pass
try:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect(("127.0.0.1", _port))
s.close()
print(f" Status: ✓ reachable")
except (OSError, Exception):
print(f" Status: ⚠ not reachable (Chrome may not be running)")
elif os.environ.get("BROWSERBASE_API_KEY"):
print("🌐 Browser: Browserbase (cloud)")
else:
print("🌐 Browser: local headless Chromium (agent-browser)")
print()
print(" /browser connect — connect to your live Chrome")
print(" /browser disconnect — revert to default")
print()
else:
print()
print("Usage: /browser connect|disconnect|status")
print()
print(" connect Connect browser tools to your live Chrome session")
print(" disconnect Revert to default browser backend")
print(" status Show current browser mode")
print()
def _handle_skin_command(self, cmd: str):
"""Handle /skin [name] — show or change the display skin."""
try:

View file

@ -48,6 +48,7 @@ COMMANDS_BY_CATEGORY = {
"/skills": "Search, install, inspect, or manage skills from online registries",
"/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)",
"/reload-mcp": "Reload MCP servers from config.yaml",
"/browser": "Connect browser tools to your live Chrome (usage: /browser connect|disconnect|status)",
"/plugins": "List installed plugins and their status",
},
"Info": {

View file

@ -12,7 +12,7 @@ EXPECTED_COMMANDS = {
"/personality", "/clear", "/history", "/new", "/reset", "/retry",
"/undo", "/save", "/config", "/cron", "/skills", "/platforms",
"/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste",
"/reload-mcp", "/rollback", "/stop", "/background", "/skin", "/voice", "/quit",
"/reload-mcp", "/rollback", "/stop", "/background", "/skin", "/voice", "/browser", "/quit",
"/plugins",
}

View file

@ -98,6 +98,16 @@ def _get_extraction_model() -> Optional[str]:
return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
def _get_cdp_override() -> str:
"""Return a user-supplied CDP URL override, or empty string.
When ``BROWSER_CDP_URL`` is set (e.g. via ``/browser connect``), we skip
both Browserbase and the local headless launcher and connect directly to
the supplied Chrome DevTools Protocol endpoint.
"""
return os.environ.get("BROWSER_CDP_URL", "").strip()
def _is_local_mode() -> bool:
"""Return True when no Browserbase credentials are configured.
@ -105,6 +115,8 @@ def _is_local_mode() -> bool:
``agent-browser --session`` instead of connecting to a remote Browserbase
session via ``--cdp``.
"""
if _get_cdp_override():
return False # CDP override takes priority
return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID"))
@ -608,6 +620,20 @@ def _create_local_session(task_id: str) -> Dict[str, str]:
}
def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]:
"""Create a session that connects to a user-supplied CDP endpoint."""
import uuid
session_name = f"cdp_{uuid.uuid4().hex[:10]}"
logger.info("Created CDP browser session %s%s for task %s",
session_name, cdp_url, task_id)
return {
"session_name": session_name,
"bb_session_id": None,
"cdp_url": cdp_url,
"features": {"cdp_override": True},
}
def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
"""
Get or create session info for the given task.
@ -638,7 +664,10 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
return _active_sessions[task_id]
# Create session outside the lock (network call in cloud mode)
if _is_local_mode():
cdp_override = _get_cdp_override()
if cdp_override:
session_info = _create_cdp_session(task_id, cdp_override)
elif _is_local_mode():
session_info = _create_local_session(task_id)
else:
session_info = _create_browserbase_session(task_id)