From a15cdfb0509db31b094aa0ff034b2432c43bc6e1 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 14 May 2026 14:11:48 +0530 Subject: [PATCH] feat(browser): browser-use + firecrawl plugins; drop single-eligible shortcut MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrates the remaining two cloud browser providers to plugins: plugins/browser/browser_use/ — dual auth (direct BROWSER_USE_API_KEY or managed Nous gateway), idempotency- key handling for retried managed-mode creates, x-external-call-id capture. plugins/browser/firecrawl/ — direct FIRECRAWL_API_KEY only; distinct from plugins/web/firecrawl/ (same key, different endpoint). Also drops the 'single-eligible shortcut' rule from agent.browser_registry._resolve(). Was a copy-paste from web_search_registry that would have introduced a real behavior change: a user with only FIRECRAWL_API_KEY set (for web-extract) would silently get routed to a paid Firecrawl cloud browser on a fresh install — not matching origin/main, which only auto-detected between Browser Use and Browserbase. Third-party browser plugins are subject to the same gate: they require explicit `browser.cloud_provider` to take effect. Verified end-to-end via plugin discovery: - 3 plugins register (browser-use, browserbase, firecrawl) - _resolve(None) with no creds: None (local mode) - _resolve(None) with only FIRECRAWL_API_KEY: None (matches main) - _resolve('firecrawl'): firecrawl (explicit wins) - _resolve(None) with BU+firecrawl: browser-use (legacy walk first hit) - _resolve(None) with all three: browser-use (legacy walk order) --- agent/browser_registry.py | 31 ++- plugins/browser/browser_use/__init__.py | 14 ++ plugins/browser/browser_use/plugin.yaml | 7 + plugins/browser/browser_use/provider.py | 305 ++++++++++++++++++++++++ plugins/browser/firecrawl/__init__.py | 16 ++ plugins/browser/firecrawl/plugin.yaml | 7 + plugins/browser/firecrawl/provider.py | 162 +++++++++++++ 7 files changed, 530 insertions(+), 12 deletions(-) create mode 100644 plugins/browser/browser_use/__init__.py create mode 100644 plugins/browser/browser_use/plugin.yaml create mode 100644 plugins/browser/browser_use/provider.py create mode 100644 plugins/browser/firecrawl/__init__.py create mode 100644 plugins/browser/firecrawl/plugin.yaml create mode 100644 plugins/browser/firecrawl/provider.py diff --git a/agent/browser_registry.py b/agent/browser_registry.py index 249c4863927..7b5b8b99b5f 100644 --- a/agent/browser_registry.py +++ b/agent/browser_registry.py @@ -12,8 +12,7 @@ Active selection The active provider is chosen by configuration with this precedence: 1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override). -2. If exactly one registered provider is available, use it. -3. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by +2. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by availability. Matches the historic auto-detect order in :func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first because it covers both the managed Nous gateway and direct API key path; @@ -22,7 +21,7 @@ The active provider is chosen by configuration with this precedence: cloud browser when they explicitly set ``browser.cloud_provider: firecrawl``, matching pre-migration behaviour where Firecrawl was never auto-selected. -4. Otherwise ``None`` — the dispatcher falls back to local browser mode. +3. Otherwise ``None`` — the dispatcher falls back to local browser mode. The explicit-config branch (rule 1) intentionally ignores ``is_available()`` so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user @@ -132,12 +131,22 @@ def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]: :meth:`is_available` returns False — the dispatcher will surface a precise "X_API_KEY is not set" error instead of silently routing somewhere else. - 3. **Single-provider shortcut.** When only one registered provider - reports ``is_available() == True``, return it. - 4. **Legacy preference walk, filtered by availability.** Walk + 3. **Legacy preference walk, filtered by availability.** Walk :data:`_LEGACY_PREFERENCE` (``browser-use`` → ``browserbase``) looking for a provider whose ``is_available()`` is True. + There is intentionally NO "single-eligible shortcut" rule here (unlike + :func:`agent.web_search_registry._resolve`). Pre-migration, the + auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only + considered Browser Use and Browserbase; Firecrawl was reachable only + via an explicit ``browser.cloud_provider: firecrawl`` config key. + Preserving that gate matters because Firecrawl shares its API key with + the *web* extract plugin (``plugins/web/firecrawl/``), so users who set + ``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a + paid cloud browser on a fresh install. Third-party browser-provider + plugins added under ``~/.hermes/plugins/browser//`` are subject + to the same gate — they must be explicitly configured to take effect. + Returns None when no provider is configured AND no available provider matches the legacy preference; the dispatcher then falls back to local browser mode. @@ -170,12 +179,10 @@ def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]: configured, ) - # 3. + 4. Auto-detect path — filter by availability so we don't surface - # a provider the user has no credentials for. - eligible = [p for p in snapshot.values() if _is_available_safe(p)] - if len(eligible) == 1: - return eligible[0] - + # 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are + # auto-eligible. Filtered by availability so we don't surface a + # provider the user has no credentials for. See docstring for why + # we do NOT fall back to "any single-eligible registered provider". for legacy in _LEGACY_PREFERENCE: provider = snapshot.get(legacy) if provider is not None and _is_available_safe(provider): diff --git a/plugins/browser/browser_use/__init__.py b/plugins/browser/browser_use/__init__.py new file mode 100644 index 00000000000..b07db13913a --- /dev/null +++ b/plugins/browser/browser_use/__init__.py @@ -0,0 +1,14 @@ +"""Browser Use cloud browser plugin — bundled, auto-loaded. + +Mirrors the ``plugins/web//`` layout: ``provider.py`` holds the +provider class; ``__init__.py::register`` instantiates and registers it. +""" + +from __future__ import annotations + +from plugins.browser.browser_use.provider import BrowserUseBrowserProvider + + +def register(ctx) -> None: + """Register the Browser Use provider with the plugin context.""" + ctx.register_browser_provider(BrowserUseBrowserProvider()) diff --git a/plugins/browser/browser_use/plugin.yaml b/plugins/browser/browser_use/plugin.yaml new file mode 100644 index 00000000000..ff926a50ea7 --- /dev/null +++ b/plugins/browser/browser_use/plugin.yaml @@ -0,0 +1,7 @@ +name: browser-browser-use +version: 1.0.0 +description: "Browser Use (https://browser-use.com) cloud browser backend. Supports both direct BROWSER_USE_API_KEY and the managed Nous tool gateway. Also powers the 'Nous Subscription' UX flow that bills usage to a Nous subscription." +author: NousResearch +kind: backend +provides_browser_providers: + - browser-use diff --git a/plugins/browser/browser_use/provider.py b/plugins/browser/browser_use/provider.py new file mode 100644 index 00000000000..82bd2420ca1 --- /dev/null +++ b/plugins/browser/browser_use/provider.py @@ -0,0 +1,305 @@ +"""Browser Use cloud browser provider — plugin form. + +Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing +ABC introduced in PR #25214). The legacy in-tree module +``tools.browser_providers.browser_use`` was removed in the same PR; this file +is now the canonical implementation. + +Browser Use is the only browser backend with dual auth: a direct +``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool +gateway (which Hermes uses to bill Browser Use sessions to a Nous +subscription). The dispatch order — direct API key first, managed gateway +second — preserves the pre-migration behaviour in +``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``. + +Config keys this provider responds to:: + + browser: + cloud_provider: "browser-use" # explicit selection + tool_gateway: + browser: "gateway" # optional: prefer managed gateway + # even when BROWSER_USE_API_KEY is set + +Auth env vars (one of):: + + BROWSER_USE_API_KEY=... # https://browser-use.com + # OR a managed Nous gateway entry (configured via 'hermes setup') +""" + +from __future__ import annotations + +import logging +import os +import threading +import uuid +from typing import Any, Dict, Optional + +import requests + +from agent.browser_provider import BrowserProvider + +logger = logging.getLogger(__name__) + +# Idempotency tracking for managed-mode session creation. The managed Nous +# gateway returns 409 "already in progress" on retried POSTs; we forward the +# original idempotency key so the gateway can deduplicate. Cleared on +# success or terminal failure. +_pending_create_keys: Dict[str, str] = {} +_pending_create_keys_lock = threading.Lock() + +_BASE_URL = "https://api.browser-use.com/api/v3" +_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5 +_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us" + + +def _get_or_create_pending_create_key(task_id: str) -> str: + with _pending_create_keys_lock: + existing = _pending_create_keys.get(task_id) + if existing: + return existing + + created = f"browser-use-session-create:{uuid.uuid4().hex}" + _pending_create_keys[task_id] = created + return created + + +def _clear_pending_create_key(task_id: str) -> None: + with _pending_create_keys_lock: + _pending_create_keys.pop(task_id, None) + + +def _should_preserve_pending_create_key(response: requests.Response) -> bool: + """Decide whether to keep the idempotency key after a failed create. + + Preserve the key when the failure looks retryable (5xx) OR when the + gateway reports the original request is still in flight (409 "already + in progress") — in either case, retrying with the same key lets the + gateway deduplicate. + + Drop the key on any other 4xx (auth failure, bad request, etc.) — those + won't succeed by being retried. + """ + if response.status_code >= 500: + return True + + if response.status_code != 409: + return False + + try: + payload = response.json() + except Exception: + return False + + if not isinstance(payload, dict): + return False + + error = payload.get("error") + if not isinstance(error, dict): + return False + + message = str(error.get("message") or "").lower() + return "already in progress" in message + + +class BrowserUseBrowserProvider(BrowserProvider): + """Browser Use (https://browser-use.com) cloud browser backend. + + Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back + to the managed Nous tool gateway when ``tool_gateway.browser`` config + routes through it. Setting ``tool_gateway.browser: gateway`` flips the + order so managed billing wins even when BROWSER_USE_API_KEY is present. + """ + + @property + def name(self) -> str: + return "browser-use" + + @property + def display_name(self) -> str: + return "Browser Use" + + def is_available(self) -> bool: + return self._get_config_or_none() is not None + + # ------------------------------------------------------------------ + # Config resolution (direct API key OR managed Nous gateway) + # ------------------------------------------------------------------ + + def _get_config_or_none(self) -> Optional[Dict[str, Any]]: + # Import here to avoid a hard dependency at module-import time — + # managed_tool_gateway pulls in the Nous auth stack which can be + # heavy and is not needed for direct-API-key users. + from tools.managed_tool_gateway import resolve_managed_tool_gateway + from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway + + # 1. Direct API key path (unless user explicitly prefers gateway). + api_key = os.environ.get("BROWSER_USE_API_KEY") + if api_key and not prefers_gateway("browser"): + return { + "api_key": api_key, + "base_url": _BASE_URL, + "managed_mode": False, + } + + # 2. Managed Nous gateway path. + managed = resolve_managed_tool_gateway("browser-use") + if managed is None: + return None + + # Hold reference to managed_nous_tools_enabled so static analysis + # doesn't flag the import as unused — the helper is consulted by + # _get_config() below to compose a more accurate error message. + _ = managed_nous_tools_enabled + + return { + "api_key": managed.nous_user_token, + "base_url": managed.gateway_origin.rstrip("/"), + "managed_mode": True, + } + + def _get_config(self) -> Dict[str, Any]: + from tools.tool_backend_helpers import managed_nous_tools_enabled + + config = self._get_config_or_none() + if config is None: + message = ( + "Browser Use requires a direct BROWSER_USE_API_KEY credential." + ) + if managed_nous_tools_enabled(): + message = ( + "Browser Use requires either a direct BROWSER_USE_API_KEY " + "credential or a managed Browser Use gateway configuration." + ) + raise ValueError(message) + return config + + # ------------------------------------------------------------------ + # Session lifecycle + # ------------------------------------------------------------------ + + def _headers(self, config: Dict[str, Any]) -> Dict[str, str]: + return { + "Content-Type": "application/json", + "X-Browser-Use-API-Key": config["api_key"], + } + + def create_session(self, task_id: str) -> Dict[str, object]: + config = self._get_config() + managed_mode = bool(config.get("managed_mode")) + + headers = self._headers(config) + if managed_mode: + headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id) + + # Keep gateway-backed sessions short so billing authorization does not + # default to a long Browser-Use timeout when Hermes only needs a task- + # scoped ephemeral browser. + payload = ( + { + "timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES, + "proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE, + } + if managed_mode + else {} + ) + + response = requests.post( + f"{config['base_url']}/browsers", + headers=headers, + json=payload, + timeout=30, + ) + + if not response.ok: + if managed_mode and not _should_preserve_pending_create_key(response): + _clear_pending_create_key(task_id) + raise RuntimeError( + f"Failed to create Browser Use session: " + f"{response.status_code} {response.text}" + ) + + session_data = response.json() + if managed_mode: + _clear_pending_create_key(task_id) + session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}" + external_call_id = ( + response.headers.get("x-external-call-id") if managed_mode else None + ) + + logger.info("Created Browser Use session %s", session_name) + + cdp_url = session_data.get("cdpUrl") or session_data.get("connectUrl") or "" + + return { + "session_name": session_name, + "bb_session_id": session_data["id"], + "cdp_url": cdp_url, + "features": {"browser_use": True}, + "external_call_id": external_call_id, + } + + def close_session(self, session_id: str) -> bool: + try: + config = self._get_config() + except ValueError: + logger.warning( + "Cannot close Browser Use session %s — missing credentials", session_id + ) + return False + + try: + response = requests.patch( + f"{config['base_url']}/browsers/{session_id}", + headers=self._headers(config), + json={"action": "stop"}, + timeout=10, + ) + if response.status_code in {200, 201, 204}: + logger.debug("Successfully closed Browser Use session %s", session_id) + return True + else: + logger.warning( + "Failed to close Browser Use session %s: HTTP %s - %s", + session_id, + response.status_code, + response.text[:200], + ) + return False + except Exception as e: + logger.error("Exception closing Browser Use session %s: %s", session_id, e) + return False + + def emergency_cleanup(self, session_id: str) -> None: + config = self._get_config_or_none() + if config is None: + logger.warning( + "Cannot emergency-cleanup Browser Use session %s — missing credentials", + session_id, + ) + return + try: + requests.patch( + f"{config['base_url']}/browsers/{session_id}", + headers=self._headers(config), + json={"action": "stop"}, + timeout=5, + ) + except Exception as e: + logger.debug( + "Emergency cleanup failed for Browser Use session %s: %s", session_id, e + ) + + def get_setup_schema(self) -> Dict[str, Any]: + return { + "name": "Browser Use", + "badge": "paid", + "tag": "Cloud browser with remote execution", + "env_vars": [ + { + "key": "BROWSER_USE_API_KEY", + "prompt": "Browser Use API key", + "url": "https://browser-use.com", + }, + ], + "post_setup": "agent_browser", + } diff --git a/plugins/browser/firecrawl/__init__.py b/plugins/browser/firecrawl/__init__.py new file mode 100644 index 00000000000..b045b636302 --- /dev/null +++ b/plugins/browser/firecrawl/__init__.py @@ -0,0 +1,16 @@ +"""Firecrawl cloud browser plugin — bundled, auto-loaded. + +Distinct from ``plugins/web/firecrawl/`` (the web search/extract/crawl +plugin); both share the FIRECRAWL_API_KEY but speak to different endpoints +(``/v2/browser`` here vs ``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl`` +over there). +""" + +from __future__ import annotations + +from plugins.browser.firecrawl.provider import FirecrawlBrowserProvider + + +def register(ctx) -> None: + """Register the Firecrawl cloud-browser provider with the plugin context.""" + ctx.register_browser_provider(FirecrawlBrowserProvider()) diff --git a/plugins/browser/firecrawl/plugin.yaml b/plugins/browser/firecrawl/plugin.yaml new file mode 100644 index 00000000000..22da6a7f4b5 --- /dev/null +++ b/plugins/browser/firecrawl/plugin.yaml @@ -0,0 +1,7 @@ +name: browser-firecrawl +version: 1.0.0 +description: "Firecrawl (https://firecrawl.dev) cloud browser backend. Requires FIRECRAWL_API_KEY. Distinct from the firecrawl WEB search/extract plugin — the two share an API key but operate on different endpoints." +author: NousResearch +kind: backend +provides_browser_providers: + - firecrawl diff --git a/plugins/browser/firecrawl/provider.py b/plugins/browser/firecrawl/provider.py new file mode 100644 index 00000000000..a3f74d32113 --- /dev/null +++ b/plugins/browser/firecrawl/provider.py @@ -0,0 +1,162 @@ +"""Firecrawl cloud browser provider — plugin form. + +Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing +ABC introduced in PR #25214). The legacy in-tree module +``tools.browser_providers.firecrawl`` was removed in the same PR; this file +is now the canonical implementation. + +This is the cloud-browser path — distinct from the firecrawl WEB plugin at +``plugins/web/firecrawl/`` which handles search/extract/crawl on +``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``. The two plugins share the +``FIRECRAWL_API_KEY`` env var but talk to different endpoints (this one +hits ``/v2/browser``). + +Config keys this provider responds to:: + + browser: + cloud_provider: "firecrawl" # explicit selection only — not in the + # legacy auto-detect walk + +Auth env vars:: + + FIRECRAWL_API_KEY=... # https://firecrawl.dev + FIRECRAWL_API_URL=... # optional override (default https://api.firecrawl.dev) + FIRECRAWL_BROWSER_TTL=... # optional, default 300 seconds +""" + +from __future__ import annotations + +import logging +import os +import uuid +from typing import Any, Dict + +import requests + +from agent.browser_provider import BrowserProvider + +logger = logging.getLogger(__name__) + +_BASE_URL = "https://api.firecrawl.dev" + + +class FirecrawlBrowserProvider(BrowserProvider): + """Firecrawl (https://firecrawl.dev) cloud browser backend. + + Cloud-browser path only — search/extract/crawl live in the separate + ``plugins/web/firecrawl/`` plugin. + """ + + @property + def name(self) -> str: + return "firecrawl" + + @property + def display_name(self) -> str: + return "Firecrawl" + + def is_available(self) -> bool: + return bool(os.environ.get("FIRECRAWL_API_KEY")) + + # ------------------------------------------------------------------ + # Session lifecycle + # ------------------------------------------------------------------ + + def _api_url(self) -> str: + return os.environ.get("FIRECRAWL_API_URL", _BASE_URL) + + def _headers(self) -> Dict[str, str]: + api_key = os.environ.get("FIRECRAWL_API_KEY") + if not api_key: + raise ValueError( + "FIRECRAWL_API_KEY environment variable is required. " + "Get your key at https://firecrawl.dev" + ) + return { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + } + + def create_session(self, task_id: str) -> Dict[str, object]: + ttl = int(os.environ.get("FIRECRAWL_BROWSER_TTL", "300")) + + body: Dict[str, object] = {"ttl": ttl} + + response = requests.post( + f"{self._api_url()}/v2/browser", + headers=self._headers(), + json=body, + timeout=30, + ) + + if not response.ok: + raise RuntimeError( + f"Failed to create Firecrawl browser session: " + f"{response.status_code} {response.text}" + ) + + data = response.json() + session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}" + + logger.info("Created Firecrawl browser session %s", session_name) + + return { + "session_name": session_name, + "bb_session_id": data["id"], + "cdp_url": data["cdpUrl"], + "features": {"firecrawl": True}, + } + + def close_session(self, session_id: str) -> bool: + try: + response = requests.delete( + f"{self._api_url()}/v2/browser/{session_id}", + headers=self._headers(), + timeout=10, + ) + if response.status_code in {200, 201, 204}: + logger.debug("Successfully closed Firecrawl session %s", session_id) + return True + else: + logger.warning( + "Failed to close Firecrawl session %s: HTTP %s - %s", + session_id, + response.status_code, + response.text[:200], + ) + return False + except Exception as e: + logger.error("Exception closing Firecrawl session %s: %s", session_id, e) + return False + + def emergency_cleanup(self, session_id: str) -> None: + try: + requests.delete( + f"{self._api_url()}/v2/browser/{session_id}", + headers=self._headers(), + timeout=5, + ) + except ValueError: + logger.warning( + "Cannot emergency-cleanup Firecrawl session %s — missing credentials", + session_id, + ) + except Exception as e: + logger.debug( + "Emergency cleanup failed for Firecrawl session %s: %s", session_id, e + ) + + def get_setup_schema(self) -> Dict[str, Any]: + return { + "name": "Firecrawl", + "badge": "paid", + "tag": "Cloud browser with remote execution", + "env_vars": [ + { + "key": "FIRECRAWL_API_KEY", + "prompt": "Firecrawl API key", + "url": "https://firecrawl.dev", + }, + ], + "post_setup": "agent_browser", + }