hermes-agent/plugins/browser/browser_use/provider.py
kshitijk4poor a15cdfb050 feat(browser): browser-use + firecrawl plugins; drop single-eligible shortcut
Migrates the remaining two cloud browser providers to plugins:

  plugins/browser/browser_use/    — dual auth (direct BROWSER_USE_API_KEY
                                    or managed Nous gateway), idempotency-
                                    key handling for retried managed-mode
                                    creates, x-external-call-id capture.
  plugins/browser/firecrawl/      — direct FIRECRAWL_API_KEY only;
                                    distinct from plugins/web/firecrawl/
                                    (same key, different endpoint).

Also drops the 'single-eligible shortcut' rule from
agent.browser_registry._resolve(). Was a copy-paste from
web_search_registry that would have introduced a real behavior change:
a user with only FIRECRAWL_API_KEY set (for web-extract) would silently
get routed to a paid Firecrawl cloud browser on a fresh install — not
matching origin/main, which only auto-detected between Browser Use and
Browserbase. Third-party browser plugins are subject to the same gate:
they require explicit `browser.cloud_provider` to take effect.

Verified end-to-end via plugin discovery:
  - 3 plugins register (browser-use, browserbase, firecrawl)
  - _resolve(None) with no creds: None (local mode)
  - _resolve(None) with only FIRECRAWL_API_KEY: None (matches main)
  - _resolve('firecrawl'): firecrawl (explicit wins)
  - _resolve(None) with BU+firecrawl: browser-use (legacy walk first hit)
  - _resolve(None) with all three: browser-use (legacy walk order)
2026-05-17 04:04:15 -07:00

305 lines
11 KiB
Python

"""Browser Use cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.browser_use`` was removed in the same PR; this file
is now the canonical implementation.
Browser Use is the only browser backend with dual auth: a direct
``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool
gateway (which Hermes uses to bill Browser Use sessions to a Nous
subscription). The dispatch order — direct API key first, managed gateway
second — preserves the pre-migration behaviour in
``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``.
Config keys this provider responds to::
browser:
cloud_provider: "browser-use" # explicit selection
tool_gateway:
browser: "gateway" # optional: prefer managed gateway
# even when BROWSER_USE_API_KEY is set
Auth env vars (one of)::
BROWSER_USE_API_KEY=... # https://browser-use.com
# OR a managed Nous gateway entry (configured via 'hermes setup')
"""
from __future__ import annotations
import logging
import os
import threading
import uuid
from typing import Any, Dict, Optional
import requests
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
# Idempotency tracking for managed-mode session creation. The managed Nous
# gateway returns 409 "already in progress" on retried POSTs; we forward the
# original idempotency key so the gateway can deduplicate. Cleared on
# success or terminal failure.
_pending_create_keys: Dict[str, str] = {}
_pending_create_keys_lock = threading.Lock()
_BASE_URL = "https://api.browser-use.com/api/v3"
_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5
_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us"
def _get_or_create_pending_create_key(task_id: str) -> str:
with _pending_create_keys_lock:
existing = _pending_create_keys.get(task_id)
if existing:
return existing
created = f"browser-use-session-create:{uuid.uuid4().hex}"
_pending_create_keys[task_id] = created
return created
def _clear_pending_create_key(task_id: str) -> None:
with _pending_create_keys_lock:
_pending_create_keys.pop(task_id, None)
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
"""Decide whether to keep the idempotency key after a failed create.
Preserve the key when the failure looks retryable (5xx) OR when the
gateway reports the original request is still in flight (409 "already
in progress") — in either case, retrying with the same key lets the
gateway deduplicate.
Drop the key on any other 4xx (auth failure, bad request, etc.) — those
won't succeed by being retried.
"""
if response.status_code >= 500:
return True
if response.status_code != 409:
return False
try:
payload = response.json()
except Exception:
return False
if not isinstance(payload, dict):
return False
error = payload.get("error")
if not isinstance(error, dict):
return False
message = str(error.get("message") or "").lower()
return "already in progress" in message
class BrowserUseBrowserProvider(BrowserProvider):
"""Browser Use (https://browser-use.com) cloud browser backend.
Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back
to the managed Nous tool gateway when ``tool_gateway.browser`` config
routes through it. Setting ``tool_gateway.browser: gateway`` flips the
order so managed billing wins even when BROWSER_USE_API_KEY is present.
"""
@property
def name(self) -> str:
return "browser-use"
@property
def display_name(self) -> str:
return "Browser Use"
def is_available(self) -> bool:
return self._get_config_or_none() is not None
# ------------------------------------------------------------------
# Config resolution (direct API key OR managed Nous gateway)
# ------------------------------------------------------------------
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
# Import here to avoid a hard dependency at module-import time —
# managed_tool_gateway pulls in the Nous auth stack which can be
# heavy and is not needed for direct-API-key users.
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
# 1. Direct API key path (unless user explicitly prefers gateway).
api_key = os.environ.get("BROWSER_USE_API_KEY")
if api_key and not prefers_gateway("browser"):
return {
"api_key": api_key,
"base_url": _BASE_URL,
"managed_mode": False,
}
# 2. Managed Nous gateway path.
managed = resolve_managed_tool_gateway("browser-use")
if managed is None:
return None
# Hold reference to managed_nous_tools_enabled so static analysis
# doesn't flag the import as unused — the helper is consulted by
# _get_config() below to compose a more accurate error message.
_ = managed_nous_tools_enabled
return {
"api_key": managed.nous_user_token,
"base_url": managed.gateway_origin.rstrip("/"),
"managed_mode": True,
}
def _get_config(self) -> Dict[str, Any]:
from tools.tool_backend_helpers import managed_nous_tools_enabled
config = self._get_config_or_none()
if config is None:
message = (
"Browser Use requires a direct BROWSER_USE_API_KEY credential."
)
if managed_nous_tools_enabled():
message = (
"Browser Use requires either a direct BROWSER_USE_API_KEY "
"credential or a managed Browser Use gateway configuration."
)
raise ValueError(message)
return config
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
return {
"Content-Type": "application/json",
"X-Browser-Use-API-Key": config["api_key"],
}
def create_session(self, task_id: str) -> Dict[str, object]:
config = self._get_config()
managed_mode = bool(config.get("managed_mode"))
headers = self._headers(config)
if managed_mode:
headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
# Keep gateway-backed sessions short so billing authorization does not
# default to a long Browser-Use timeout when Hermes only needs a task-
# scoped ephemeral browser.
payload = (
{
"timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES,
"proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE,
}
if managed_mode
else {}
)
response = requests.post(
f"{config['base_url']}/browsers",
headers=headers,
json=payload,
timeout=30,
)
if not response.ok:
if managed_mode and not _should_preserve_pending_create_key(response):
_clear_pending_create_key(task_id)
raise RuntimeError(
f"Failed to create Browser Use session: "
f"{response.status_code} {response.text}"
)
session_data = response.json()
if managed_mode:
_clear_pending_create_key(task_id)
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
external_call_id = (
response.headers.get("x-external-call-id") if managed_mode else None
)
logger.info("Created Browser Use session %s", session_name)
cdp_url = session_data.get("cdpUrl") or session_data.get("connectUrl") or ""
return {
"session_name": session_name,
"bb_session_id": session_data["id"],
"cdp_url": cdp_url,
"features": {"browser_use": True},
"external_call_id": external_call_id,
}
def close_session(self, session_id: str) -> bool:
try:
config = self._get_config()
except ValueError:
logger.warning(
"Cannot close Browser Use session %s — missing credentials", session_id
)
return False
try:
response = requests.patch(
f"{config['base_url']}/browsers/{session_id}",
headers=self._headers(config),
json={"action": "stop"},
timeout=10,
)
if response.status_code in {200, 201, 204}:
logger.debug("Successfully closed Browser Use session %s", session_id)
return True
else:
logger.warning(
"Failed to close Browser Use session %s: HTTP %s - %s",
session_id,
response.status_code,
response.text[:200],
)
return False
except Exception as e:
logger.error("Exception closing Browser Use session %s: %s", session_id, e)
return False
def emergency_cleanup(self, session_id: str) -> None:
config = self._get_config_or_none()
if config is None:
logger.warning(
"Cannot emergency-cleanup Browser Use session %s — missing credentials",
session_id,
)
return
try:
requests.patch(
f"{config['base_url']}/browsers/{session_id}",
headers=self._headers(config),
json={"action": "stop"},
timeout=5,
)
except Exception as e:
logger.debug(
"Emergency cleanup failed for Browser Use session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Browser Use",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{
"key": "BROWSER_USE_API_KEY",
"prompt": "Browser Use API key",
"url": "https://browser-use.com",
},
],
"post_setup": "agent_browser",
}