mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(tools): add Firecrawl cloud browser provider (#5628)
* feat(tools): add Firecrawl cloud browser provider Adds Firecrawl (https://firecrawl.dev) as a cloud browser provider alongside Browserbase and Browser Use. All browser tools route through Firecrawl's cloud browser via CDP when selected. - tools/browser_providers/firecrawl.py — FirecrawlProvider - tools/browser_tool.py — register in _PROVIDER_REGISTRY - hermes_cli/tools_config.py — add to onboarding provider picker - hermes_cli/setup.py — add to setup summary - hermes_cli/config.py — add FIRECRAWL_BROWSER_TTL config - website/docs/ — browser docs and env var reference Based on #4490 by @developersdigest. Co-Authored-By: Developers Digest <124798203+developersdigest@users.noreply.github.com> * refactor: simplify FirecrawlProvider.emergency_cleanup Use self._headers() and self._api_url() instead of duplicating env-var reads and header construction. * fix: recognize Firecrawl in subscription browser detection _resolve_browser_feature_state() now handles "firecrawl" as a direct browser provider (same pattern as "browser-use"), so hermes setup summary correctly shows "Browser Automation (Firecrawl)" instead of misreporting as "Local browser". Also fixes test_config_version_unchanged assertion (11 → 12). --------- Co-authored-by: Developers Digest <124798203+developersdigest@users.noreply.github.com>
This commit is contained in:
parent
150f70f821
commit
7b129636f0
9 changed files with 169 additions and 9 deletions
|
|
@ -868,6 +868,13 @@ OPTIONAL_ENV_VARS = {
|
|||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"FIRECRAWL_BROWSER_TTL": {
|
||||
"description": "Firecrawl browser session TTL in seconds (optional, default 300)",
|
||||
"prompt": "Browser session TTL (seconds)",
|
||||
"tools": ["browser_navigate", "browser_click"],
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
},
|
||||
"CAMOFOX_URL": {
|
||||
"description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
|
||||
"prompt": "Camofox server URL",
|
||||
|
|
|
|||
|
|
@ -131,6 +131,7 @@ def _browser_label(current_provider: str) -> str:
|
|||
mapping = {
|
||||
"browserbase": "Browserbase",
|
||||
"browser-use": "Browser Use",
|
||||
"firecrawl": "Firecrawl",
|
||||
"camofox": "Camofox",
|
||||
"local": "Local browser",
|
||||
}
|
||||
|
|
@ -156,6 +157,7 @@ def _resolve_browser_feature_state(
|
|||
direct_camofox: bool,
|
||||
direct_browserbase: bool,
|
||||
direct_browser_use: bool,
|
||||
direct_firecrawl: bool,
|
||||
managed_browser_available: bool,
|
||||
) -> tuple[str, bool, bool, bool]:
|
||||
"""Resolve browser availability using the same precedence as runtime."""
|
||||
|
|
@ -179,6 +181,10 @@ def _resolve_browser_feature_state(
|
|||
available = bool(browser_local_available and direct_browser_use)
|
||||
active = bool(browser_tool_enabled and available)
|
||||
return current_provider, available, active, False
|
||||
if current_provider == "firecrawl":
|
||||
available = bool(browser_local_available and direct_firecrawl)
|
||||
active = bool(browser_tool_enabled and available)
|
||||
return current_provider, available, active, False
|
||||
if current_provider == "camofox":
|
||||
return current_provider, False, False, False
|
||||
|
||||
|
|
@ -315,6 +321,7 @@ def get_nous_subscription_features(
|
|||
direct_camofox=direct_camofox,
|
||||
direct_browserbase=direct_browserbase,
|
||||
direct_browser_use=direct_browser_use,
|
||||
direct_firecrawl=direct_firecrawl,
|
||||
managed_browser_available=managed_browser_available,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -657,7 +657,7 @@ def _print_setup_summary(config: dict, hermes_home):
|
|||
else:
|
||||
tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY"))
|
||||
|
||||
# Browser tools (local Chromium, Camofox, Browserbase, or Browser Use)
|
||||
# Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl)
|
||||
browser_provider = subscription_features.browser.current_provider
|
||||
if subscription_features.browser.managed_by_nous:
|
||||
tool_status.append(("Browser Automation (Nous Browserbase)", True, None))
|
||||
|
|
|
|||
|
|
@ -315,6 +315,15 @@ TOOL_CATEGORIES = {
|
|||
"browser_provider": "browser-use",
|
||||
"post_setup": "browserbase",
|
||||
},
|
||||
{
|
||||
"name": "Firecrawl",
|
||||
"tag": "Cloud browser with remote execution",
|
||||
"env_vars": [
|
||||
{"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
|
||||
],
|
||||
"browser_provider": "firecrawl",
|
||||
"post_setup": "browserbase",
|
||||
},
|
||||
{
|
||||
"name": "Camofox",
|
||||
"tag": "Local anti-detection browser (Firefox/Camoufox)",
|
||||
|
|
|
|||
|
|
@ -63,4 +63,4 @@ class TestCamofoxConfigDefaults:
|
|||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# managed_persistence is auto-merged by _deep_merge, no version bump needed
|
||||
assert DEFAULT_CONFIG["_config_version"] == 11
|
||||
assert DEFAULT_CONFIG["_config_version"] == 12
|
||||
|
|
|
|||
107
tools/browser_providers/firecrawl.py
Normal file
107
tools/browser_providers/firecrawl.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"""Firecrawl cloud browser provider."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Dict
|
||||
|
||||
import requests
|
||||
|
||||
from tools.browser_providers.base import CloudBrowserProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BASE_URL = "https://api.firecrawl.dev"
|
||||
|
||||
|
||||
class FirecrawlProvider(CloudBrowserProvider):
|
||||
"""Firecrawl (https://firecrawl.dev) cloud browser backend."""
|
||||
|
||||
def provider_name(self) -> str:
|
||||
return "Firecrawl"
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
return bool(os.environ.get("FIRECRAWL_API_KEY"))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _api_url(self) -> str:
|
||||
return os.environ.get("FIRECRAWL_API_URL", _BASE_URL)
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
api_key = os.environ.get("FIRECRAWL_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"FIRECRAWL_API_KEY environment variable is required. "
|
||||
"Get your key at https://firecrawl.dev"
|
||||
)
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
|
||||
def create_session(self, task_id: str) -> Dict[str, object]:
|
||||
ttl = int(os.environ.get("FIRECRAWL_BROWSER_TTL", "300"))
|
||||
|
||||
body: Dict[str, object] = {"ttl": ttl}
|
||||
|
||||
response = requests.post(
|
||||
f"{self._api_url()}/v2/browser",
|
||||
headers=self._headers(),
|
||||
json=body,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if not response.ok:
|
||||
raise RuntimeError(
|
||||
f"Failed to create Firecrawl browser session: "
|
||||
f"{response.status_code} {response.text}"
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
logger.info("Created Firecrawl browser session %s", session_name)
|
||||
|
||||
return {
|
||||
"session_name": session_name,
|
||||
"bb_session_id": data["id"],
|
||||
"cdp_url": data["cdpUrl"],
|
||||
"features": {"firecrawl": True},
|
||||
}
|
||||
|
||||
def close_session(self, session_id: str) -> bool:
|
||||
try:
|
||||
response = requests.delete(
|
||||
f"{self._api_url()}/v2/browser/{session_id}",
|
||||
headers=self._headers(),
|
||||
timeout=10,
|
||||
)
|
||||
if response.status_code in (200, 201, 204):
|
||||
logger.debug("Successfully closed Firecrawl session %s", session_id)
|
||||
return True
|
||||
else:
|
||||
logger.warning(
|
||||
"Failed to close Firecrawl session %s: HTTP %s - %s",
|
||||
session_id,
|
||||
response.status_code,
|
||||
response.text[:200],
|
||||
)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Exception closing Firecrawl session %s: %s", session_id, e)
|
||||
return False
|
||||
|
||||
def emergency_cleanup(self, session_id: str) -> None:
|
||||
try:
|
||||
requests.delete(
|
||||
f"{self._api_url()}/v2/browser/{session_id}",
|
||||
headers=self._headers(),
|
||||
timeout=5,
|
||||
)
|
||||
except ValueError:
|
||||
logger.warning("Cannot emergency-cleanup Firecrawl session %s — missing credentials", session_id)
|
||||
except Exception as e:
|
||||
logger.debug("Emergency cleanup failed for Firecrawl session %s: %s", session_id, e)
|
||||
|
|
@ -79,6 +79,7 @@ except Exception:
|
|||
from tools.browser_providers.base import CloudBrowserProvider
|
||||
from tools.browser_providers.browserbase import BrowserbaseProvider
|
||||
from tools.browser_providers.browser_use import BrowserUseProvider
|
||||
from tools.browser_providers.firecrawl import FirecrawlProvider
|
||||
from tools.tool_backend_helpers import normalize_browser_cloud_provider
|
||||
|
||||
# Camofox local anti-detection browser backend (optional).
|
||||
|
|
@ -235,6 +236,7 @@ def _get_cdp_override() -> str:
|
|||
_PROVIDER_REGISTRY: Dict[str, type] = {
|
||||
"browserbase": BrowserbaseProvider,
|
||||
"browser-use": BrowserUseProvider,
|
||||
"firecrawl": FirecrawlProvider,
|
||||
}
|
||||
|
||||
_cached_cloud_provider: Optional[CloudBrowserProvider] = None
|
||||
|
|
@ -2036,12 +2038,12 @@ def check_browser_requirements() -> bool:
|
|||
"""
|
||||
Check if browser tool requirements are met.
|
||||
|
||||
In **local mode** (no Browserbase credentials): only the ``agent-browser``
|
||||
CLI must be findable.
|
||||
In **local mode** (no cloud provider configured): only the
|
||||
``agent-browser`` CLI must be findable.
|
||||
|
||||
In **cloud mode** (Browserbase, Browser Use, or Firecrawl): the CLI
|
||||
*and* the provider's required credentials must be present.
|
||||
|
||||
In **cloud mode** (BROWSERBASE_API_KEY set): the CLI *and* both
|
||||
``BROWSERBASE_API_KEY`` / ``BROWSERBASE_PROJECT_ID`` must be present.
|
||||
|
||||
Returns:
|
||||
True if all requirements are met, False otherwise
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -77,13 +77,14 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
|||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `PARALLEL_API_KEY` | AI-native web search ([parallel.ai](https://parallel.ai/)) |
|
||||
| `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) |
|
||||
| `FIRECRAWL_API_KEY` | Web scraping and cloud browser ([firecrawl.dev](https://firecrawl.dev/)) |
|
||||
| `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) |
|
||||
| `TAVILY_API_KEY` | Tavily API key for AI-native web search, extract, and crawl ([app.tavily.com](https://app.tavily.com/home)) |
|
||||
| `EXA_API_KEY` | Exa API key for AI-native web search and contents ([exa.ai](https://exa.ai/)) |
|
||||
| `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) |
|
||||
| `BROWSERBASE_PROJECT_ID` | Browserbase project ID |
|
||||
| `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) |
|
||||
| `FIRECRAWL_BROWSER_TTL` | Firecrawl browser session TTL in seconds (default: 300) |
|
||||
| `BROWSER_CDP_URL` | Chrome DevTools Protocol URL for local browser (set via `/browser connect`, e.g. `ws://localhost:9222`) |
|
||||
| `CAMOFOX_URL` | Camofox local anti-detection browser URL (default: `http://localhost:9377`) |
|
||||
| `BROWSER_INACTIVITY_TIMEOUT` | Browser session inactivity timeout in seconds |
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ Hermes Agent includes a full browser automation toolset with multiple backend op
|
|||
|
||||
- **Browserbase cloud mode** via [Browserbase](https://browserbase.com) for managed cloud browsers and anti-bot tooling
|
||||
- **Browser Use cloud mode** via [Browser Use](https://browser-use.com) as an alternative cloud browser provider
|
||||
- **Firecrawl cloud mode** via [Firecrawl](https://firecrawl.dev) for cloud browsers with built-in scraping
|
||||
- **Camofox local mode** via [Camofox](https://github.com/jo-inc/camofox-browser) for local anti-detection browsing (Firefox-based fingerprint spoofing)
|
||||
- **Local Chrome via CDP** — connect browser tools to your own Chrome instance using `/browser connect`
|
||||
- **Local browser mode** via the `agent-browser` CLI and a local Chromium installation
|
||||
|
|
@ -23,7 +24,7 @@ Pages are represented as **accessibility trees** (text-based snapshots), making
|
|||
|
||||
Key capabilities:
|
||||
|
||||
- **Multi-provider cloud execution** — Browserbase or Browser Use, no local browser needed
|
||||
- **Multi-provider cloud execution** — Browserbase, Browser Use, or Firecrawl — no local browser needed
|
||||
- **Local Chrome integration** — attach to your running Chrome via CDP for hands-on browsing
|
||||
- **Built-in stealth** — random fingerprints, CAPTCHA solving, residential proxies (Browserbase)
|
||||
- **Session isolation** — each task gets its own browser session
|
||||
|
|
@ -55,6 +56,32 @@ BROWSER_USE_API_KEY=***
|
|||
|
||||
Get your API key at [browser-use.com](https://browser-use.com). Browser Use provides a cloud browser via its REST API. If both Browserbase and Browser Use credentials are set, Browserbase takes priority.
|
||||
|
||||
### Firecrawl cloud mode
|
||||
|
||||
To use Firecrawl as your cloud browser provider, add:
|
||||
|
||||
```bash
|
||||
# Add to ~/.hermes/.env
|
||||
FIRECRAWL_API_KEY=fc-***
|
||||
```
|
||||
|
||||
Get your API key at [firecrawl.dev](https://firecrawl.dev). Then select Firecrawl as your browser provider:
|
||||
|
||||
```bash
|
||||
hermes setup tools
|
||||
# → Browser Automation → Firecrawl
|
||||
```
|
||||
|
||||
Optional settings:
|
||||
|
||||
```bash
|
||||
# Self-hosted Firecrawl instance (default: https://api.firecrawl.dev)
|
||||
FIRECRAWL_API_URL=http://localhost:3002
|
||||
|
||||
# Session TTL in seconds (default: 300)
|
||||
FIRECRAWL_BROWSER_TTL=600
|
||||
```
|
||||
|
||||
### Camofox local mode
|
||||
|
||||
[Camofox](https://github.com/jo-inc/camofox-browser) is a self-hosted Node.js server wrapping Camoufox (a Firefox fork with C++ fingerprint spoofing). It provides local anti-detection browsing without cloud dependencies.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue