feat(browser): browser-use + firecrawl plugins; drop single-eligible shortcut

Migrates the remaining two cloud browser providers to plugins:

  plugins/browser/browser_use/    — dual auth (direct BROWSER_USE_API_KEY
                                    or managed Nous gateway), idempotency-
                                    key handling for retried managed-mode
                                    creates, x-external-call-id capture.
  plugins/browser/firecrawl/      — direct FIRECRAWL_API_KEY only;
                                    distinct from plugins/web/firecrawl/
                                    (same key, different endpoint).

Also drops the 'single-eligible shortcut' rule from
agent.browser_registry._resolve(). Was a copy-paste from
web_search_registry that would have introduced a real behavior change:
a user with only FIRECRAWL_API_KEY set (for web-extract) would silently
get routed to a paid Firecrawl cloud browser on a fresh install — not
matching origin/main, which only auto-detected between Browser Use and
Browserbase. Third-party browser plugins are subject to the same gate:
they require explicit `browser.cloud_provider` to take effect.

Verified end-to-end via plugin discovery:
  - 3 plugins register (browser-use, browserbase, firecrawl)
  - _resolve(None) with no creds: None (local mode)
  - _resolve(None) with only FIRECRAWL_API_KEY: None (matches main)
  - _resolve('firecrawl'): firecrawl (explicit wins)
  - _resolve(None) with BU+firecrawl: browser-use (legacy walk first hit)
  - _resolve(None) with all three: browser-use (legacy walk order)
This commit is contained in:
kshitijk4poor 2026-05-14 14:11:48 +05:30 committed by Teknium
parent b8138ac405
commit a15cdfb050
7 changed files with 530 additions and 12 deletions

View file

@ -12,8 +12,7 @@ Active selection
The active provider is chosen by configuration with this precedence:
1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override).
2. If exactly one registered provider is available, use it.
3. Legacy preference order ``browser-use`` ``browserbase`` filtered by
2. Legacy preference order ``browser-use`` ``browserbase`` filtered by
availability. Matches the historic auto-detect order in
:func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first
because it covers both the managed Nous gateway and direct API key path;
@ -22,7 +21,7 @@ The active provider is chosen by configuration with this precedence:
cloud browser when they explicitly set ``browser.cloud_provider:
firecrawl``, matching pre-migration behaviour where Firecrawl was never
auto-selected.
4. Otherwise ``None`` the dispatcher falls back to local browser mode.
3. Otherwise ``None`` the dispatcher falls back to local browser mode.
The explicit-config branch (rule 1) intentionally ignores ``is_available()``
so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user
@ -132,12 +131,22 @@ def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
:meth:`is_available` returns False the dispatcher will surface a
precise "X_API_KEY is not set" error instead of silently routing
somewhere else.
3. **Single-provider shortcut.** When only one registered provider
reports ``is_available() == True``, return it.
4. **Legacy preference walk, filtered by availability.** Walk
3. **Legacy preference walk, filtered by availability.** Walk
:data:`_LEGACY_PREFERENCE` (``browser-use`` ``browserbase``) looking
for a provider whose ``is_available()`` is True.
There is intentionally NO "single-eligible shortcut" rule here (unlike
:func:`agent.web_search_registry._resolve`). Pre-migration, the
auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only
considered Browser Use and Browserbase; Firecrawl was reachable only
via an explicit ``browser.cloud_provider: firecrawl`` config key.
Preserving that gate matters because Firecrawl shares its API key with
the *web* extract plugin (``plugins/web/firecrawl/``), so users who set
``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a
paid cloud browser on a fresh install. Third-party browser-provider
plugins added under ``~/.hermes/plugins/browser/<vendor>/`` are subject
to the same gate they must be explicitly configured to take effect.
Returns None when no provider is configured AND no available provider
matches the legacy preference; the dispatcher then falls back to local
browser mode.
@ -170,12 +179,10 @@ def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
configured,
)
# 3. + 4. Auto-detect path — filter by availability so we don't surface
# a provider the user has no credentials for.
eligible = [p for p in snapshot.values() if _is_available_safe(p)]
if len(eligible) == 1:
return eligible[0]
# 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are
# auto-eligible. Filtered by availability so we don't surface a
# provider the user has no credentials for. See docstring for why
# we do NOT fall back to "any single-eligible registered provider".
for legacy in _LEGACY_PREFERENCE:
provider = snapshot.get(legacy)
if provider is not None and _is_available_safe(provider):

View file

@ -0,0 +1,14 @@
"""Browser Use cloud browser plugin — bundled, auto-loaded.
Mirrors the ``plugins/web/<vendor>/`` layout: ``provider.py`` holds the
provider class; ``__init__.py::register`` instantiates and registers it.
"""
from __future__ import annotations
from plugins.browser.browser_use.provider import BrowserUseBrowserProvider
def register(ctx) -> None:
"""Register the Browser Use provider with the plugin context."""
ctx.register_browser_provider(BrowserUseBrowserProvider())

View file

@ -0,0 +1,7 @@
name: browser-browser-use
version: 1.0.0
description: "Browser Use (https://browser-use.com) cloud browser backend. Supports both direct BROWSER_USE_API_KEY and the managed Nous tool gateway. Also powers the 'Nous Subscription' UX flow that bills usage to a Nous subscription."
author: NousResearch
kind: backend
provides_browser_providers:
- browser-use

View file

@ -0,0 +1,305 @@
"""Browser Use cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.browser_use`` was removed in the same PR; this file
is now the canonical implementation.
Browser Use is the only browser backend with dual auth: a direct
``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool
gateway (which Hermes uses to bill Browser Use sessions to a Nous
subscription). The dispatch order direct API key first, managed gateway
second preserves the pre-migration behaviour in
``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``.
Config keys this provider responds to::
browser:
cloud_provider: "browser-use" # explicit selection
tool_gateway:
browser: "gateway" # optional: prefer managed gateway
# even when BROWSER_USE_API_KEY is set
Auth env vars (one of)::
BROWSER_USE_API_KEY=... # https://browser-use.com
# OR a managed Nous gateway entry (configured via 'hermes setup')
"""
from __future__ import annotations
import logging
import os
import threading
import uuid
from typing import Any, Dict, Optional
import requests
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
# Idempotency tracking for managed-mode session creation. The managed Nous
# gateway returns 409 "already in progress" on retried POSTs; we forward the
# original idempotency key so the gateway can deduplicate. Cleared on
# success or terminal failure.
_pending_create_keys: Dict[str, str] = {}
_pending_create_keys_lock = threading.Lock()
_BASE_URL = "https://api.browser-use.com/api/v3"
_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5
_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us"
def _get_or_create_pending_create_key(task_id: str) -> str:
with _pending_create_keys_lock:
existing = _pending_create_keys.get(task_id)
if existing:
return existing
created = f"browser-use-session-create:{uuid.uuid4().hex}"
_pending_create_keys[task_id] = created
return created
def _clear_pending_create_key(task_id: str) -> None:
with _pending_create_keys_lock:
_pending_create_keys.pop(task_id, None)
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
"""Decide whether to keep the idempotency key after a failed create.
Preserve the key when the failure looks retryable (5xx) OR when the
gateway reports the original request is still in flight (409 "already
in progress") — in either case, retrying with the same key lets the
gateway deduplicate.
Drop the key on any other 4xx (auth failure, bad request, etc.) those
won't succeed by being retried.
"""
if response.status_code >= 500:
return True
if response.status_code != 409:
return False
try:
payload = response.json()
except Exception:
return False
if not isinstance(payload, dict):
return False
error = payload.get("error")
if not isinstance(error, dict):
return False
message = str(error.get("message") or "").lower()
return "already in progress" in message
class BrowserUseBrowserProvider(BrowserProvider):
"""Browser Use (https://browser-use.com) cloud browser backend.
Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back
to the managed Nous tool gateway when ``tool_gateway.browser`` config
routes through it. Setting ``tool_gateway.browser: gateway`` flips the
order so managed billing wins even when BROWSER_USE_API_KEY is present.
"""
@property
def name(self) -> str:
return "browser-use"
@property
def display_name(self) -> str:
return "Browser Use"
def is_available(self) -> bool:
return self._get_config_or_none() is not None
# ------------------------------------------------------------------
# Config resolution (direct API key OR managed Nous gateway)
# ------------------------------------------------------------------
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
# Import here to avoid a hard dependency at module-import time —
# managed_tool_gateway pulls in the Nous auth stack which can be
# heavy and is not needed for direct-API-key users.
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
# 1. Direct API key path (unless user explicitly prefers gateway).
api_key = os.environ.get("BROWSER_USE_API_KEY")
if api_key and not prefers_gateway("browser"):
return {
"api_key": api_key,
"base_url": _BASE_URL,
"managed_mode": False,
}
# 2. Managed Nous gateway path.
managed = resolve_managed_tool_gateway("browser-use")
if managed is None:
return None
# Hold reference to managed_nous_tools_enabled so static analysis
# doesn't flag the import as unused — the helper is consulted by
# _get_config() below to compose a more accurate error message.
_ = managed_nous_tools_enabled
return {
"api_key": managed.nous_user_token,
"base_url": managed.gateway_origin.rstrip("/"),
"managed_mode": True,
}
def _get_config(self) -> Dict[str, Any]:
from tools.tool_backend_helpers import managed_nous_tools_enabled
config = self._get_config_or_none()
if config is None:
message = (
"Browser Use requires a direct BROWSER_USE_API_KEY credential."
)
if managed_nous_tools_enabled():
message = (
"Browser Use requires either a direct BROWSER_USE_API_KEY "
"credential or a managed Browser Use gateway configuration."
)
raise ValueError(message)
return config
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
return {
"Content-Type": "application/json",
"X-Browser-Use-API-Key": config["api_key"],
}
def create_session(self, task_id: str) -> Dict[str, object]:
config = self._get_config()
managed_mode = bool(config.get("managed_mode"))
headers = self._headers(config)
if managed_mode:
headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
# Keep gateway-backed sessions short so billing authorization does not
# default to a long Browser-Use timeout when Hermes only needs a task-
# scoped ephemeral browser.
payload = (
{
"timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES,
"proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE,
}
if managed_mode
else {}
)
response = requests.post(
f"{config['base_url']}/browsers",
headers=headers,
json=payload,
timeout=30,
)
if not response.ok:
if managed_mode and not _should_preserve_pending_create_key(response):
_clear_pending_create_key(task_id)
raise RuntimeError(
f"Failed to create Browser Use session: "
f"{response.status_code} {response.text}"
)
session_data = response.json()
if managed_mode:
_clear_pending_create_key(task_id)
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
external_call_id = (
response.headers.get("x-external-call-id") if managed_mode else None
)
logger.info("Created Browser Use session %s", session_name)
cdp_url = session_data.get("cdpUrl") or session_data.get("connectUrl") or ""
return {
"session_name": session_name,
"bb_session_id": session_data["id"],
"cdp_url": cdp_url,
"features": {"browser_use": True},
"external_call_id": external_call_id,
}
def close_session(self, session_id: str) -> bool:
try:
config = self._get_config()
except ValueError:
logger.warning(
"Cannot close Browser Use session %s — missing credentials", session_id
)
return False
try:
response = requests.patch(
f"{config['base_url']}/browsers/{session_id}",
headers=self._headers(config),
json={"action": "stop"},
timeout=10,
)
if response.status_code in {200, 201, 204}:
logger.debug("Successfully closed Browser Use session %s", session_id)
return True
else:
logger.warning(
"Failed to close Browser Use session %s: HTTP %s - %s",
session_id,
response.status_code,
response.text[:200],
)
return False
except Exception as e:
logger.error("Exception closing Browser Use session %s: %s", session_id, e)
return False
def emergency_cleanup(self, session_id: str) -> None:
config = self._get_config_or_none()
if config is None:
logger.warning(
"Cannot emergency-cleanup Browser Use session %s — missing credentials",
session_id,
)
return
try:
requests.patch(
f"{config['base_url']}/browsers/{session_id}",
headers=self._headers(config),
json={"action": "stop"},
timeout=5,
)
except Exception as e:
logger.debug(
"Emergency cleanup failed for Browser Use session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Browser Use",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{
"key": "BROWSER_USE_API_KEY",
"prompt": "Browser Use API key",
"url": "https://browser-use.com",
},
],
"post_setup": "agent_browser",
}

View file

@ -0,0 +1,16 @@
"""Firecrawl cloud browser plugin — bundled, auto-loaded.
Distinct from ``plugins/web/firecrawl/`` (the web search/extract/crawl
plugin); both share the FIRECRAWL_API_KEY but speak to different endpoints
(``/v2/browser`` here vs ``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``
over there).
"""
from __future__ import annotations
from plugins.browser.firecrawl.provider import FirecrawlBrowserProvider
def register(ctx) -> None:
"""Register the Firecrawl cloud-browser provider with the plugin context."""
ctx.register_browser_provider(FirecrawlBrowserProvider())

View file

@ -0,0 +1,7 @@
name: browser-firecrawl
version: 1.0.0
description: "Firecrawl (https://firecrawl.dev) cloud browser backend. Requires FIRECRAWL_API_KEY. Distinct from the firecrawl WEB search/extract plugin — the two share an API key but operate on different endpoints."
author: NousResearch
kind: backend
provides_browser_providers:
- firecrawl

View file

@ -0,0 +1,162 @@
"""Firecrawl cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.firecrawl`` was removed in the same PR; this file
is now the canonical implementation.
This is the cloud-browser path distinct from the firecrawl WEB plugin at
``plugins/web/firecrawl/`` which handles search/extract/crawl on
``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``. The two plugins share the
``FIRECRAWL_API_KEY`` env var but talk to different endpoints (this one
hits ``/v2/browser``).
Config keys this provider responds to::
browser:
cloud_provider: "firecrawl" # explicit selection only — not in the
# legacy auto-detect walk
Auth env vars::
FIRECRAWL_API_KEY=... # https://firecrawl.dev
FIRECRAWL_API_URL=... # optional override (default https://api.firecrawl.dev)
FIRECRAWL_BROWSER_TTL=... # optional, default 300 seconds
"""
from __future__ import annotations
import logging
import os
import uuid
from typing import Any, Dict
import requests
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
_BASE_URL = "https://api.firecrawl.dev"
class FirecrawlBrowserProvider(BrowserProvider):
"""Firecrawl (https://firecrawl.dev) cloud browser backend.
Cloud-browser path only search/extract/crawl live in the separate
``plugins/web/firecrawl/`` plugin.
"""
@property
def name(self) -> str:
return "firecrawl"
@property
def display_name(self) -> str:
return "Firecrawl"
def is_available(self) -> bool:
return bool(os.environ.get("FIRECRAWL_API_KEY"))
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
def _api_url(self) -> str:
return os.environ.get("FIRECRAWL_API_URL", _BASE_URL)
def _headers(self) -> Dict[str, str]:
api_key = os.environ.get("FIRECRAWL_API_KEY")
if not api_key:
raise ValueError(
"FIRECRAWL_API_KEY environment variable is required. "
"Get your key at https://firecrawl.dev"
)
return {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
def create_session(self, task_id: str) -> Dict[str, object]:
ttl = int(os.environ.get("FIRECRAWL_BROWSER_TTL", "300"))
body: Dict[str, object] = {"ttl": ttl}
response = requests.post(
f"{self._api_url()}/v2/browser",
headers=self._headers(),
json=body,
timeout=30,
)
if not response.ok:
raise RuntimeError(
f"Failed to create Firecrawl browser session: "
f"{response.status_code} {response.text}"
)
data = response.json()
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
logger.info("Created Firecrawl browser session %s", session_name)
return {
"session_name": session_name,
"bb_session_id": data["id"],
"cdp_url": data["cdpUrl"],
"features": {"firecrawl": True},
}
def close_session(self, session_id: str) -> bool:
try:
response = requests.delete(
f"{self._api_url()}/v2/browser/{session_id}",
headers=self._headers(),
timeout=10,
)
if response.status_code in {200, 201, 204}:
logger.debug("Successfully closed Firecrawl session %s", session_id)
return True
else:
logger.warning(
"Failed to close Firecrawl session %s: HTTP %s - %s",
session_id,
response.status_code,
response.text[:200],
)
return False
except Exception as e:
logger.error("Exception closing Firecrawl session %s: %s", session_id, e)
return False
def emergency_cleanup(self, session_id: str) -> None:
try:
requests.delete(
f"{self._api_url()}/v2/browser/{session_id}",
headers=self._headers(),
timeout=5,
)
except ValueError:
logger.warning(
"Cannot emergency-cleanup Firecrawl session %s — missing credentials",
session_id,
)
except Exception as e:
logger.debug(
"Emergency cleanup failed for Firecrawl session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Firecrawl",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{
"key": "FIRECRAWL_API_KEY",
"prompt": "Firecrawl API key",
"url": "https://firecrawl.dev",
},
],
"post_setup": "agent_browser",
}