mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
Both implement WebSearchProvider via tools/web_providers/ — matching the
existing SearXNG pattern (PR #5c906d702). Search-only; pair with any
extract provider via web.extract_backend.
- tools/web_providers/brave_free.py — Brave Search API (free tier, 2k
queries/mo). Uses BRAVE_SEARCH_API_KEY as X-Subscription-Token.
- tools/web_providers/ddgs.py — DuckDuckGo via the ddgs Python package.
No API key; gated on package importability.
- tools/web_tools.py: both backends added to _get_backend() config list
and auto-detect chain (trails paid providers), _is_backend_available,
web_search_tool dispatch, web_extract_tool + web_crawl_tool search-only
refusals, check_web_api_key, and the __main__ diagnostic. Introduces
_ddgs_package_importable() helper so tests can monkeypatch a single
symbol for the ddgs availability check.
- hermes_cli/tools_config.py: picker entries for both providers; ddgs
gets a post_setup handler that runs `pip install ddgs`.
- hermes_cli/config.py: BRAVE_SEARCH_API_KEY in OPTIONAL_ENV_VARS.
- scripts/release.py: AUTHOR_MAP entry for @Abd0r.
- tests: 14 new tests (brave-free) + 15 new tests (ddgs) covering
provider unit behavior, backend wiring, and search-only refusals.
Salvages the brave-free + ddgs portion of PR #19796. Not included: the
in-line helpers in web_tools.py (replaced with provider modules to match
the shipped architecture), the lynx-based extract path (these backends
should refuse extract with a clear error — users pair with a real
extract provider), and scripts/start-llama-server.sh (unrelated).
Co-authored-by: Abd0r <223003280+Abd0r@users.noreply.github.com>
130 lines
4.2 KiB
Python
130 lines
4.2 KiB
Python
"""Brave Search web search provider (free tier).
|
|
|
|
Brave Search's Data-for-Search API offers a free tier (2,000 queries/mo at the
|
|
time of writing) after signing up at https://brave.com/search/api/. This
|
|
provider implements ``WebSearchProvider`` only — the Data-for-Search endpoint
|
|
returns search results, it does not extract/crawl arbitrary URLs.
|
|
|
|
Configuration::
|
|
|
|
# ~/.hermes/.env
|
|
BRAVE_SEARCH_API_KEY=your-subscription-token
|
|
|
|
# ~/.hermes/config.yaml
|
|
web:
|
|
search_backend: "brave-free"
|
|
extract_backend: "firecrawl" # pair with an extract provider if needed
|
|
|
|
The API uses the ``X-Subscription-Token`` header. Free-tier keys are rate
|
|
limited (1 qps) and capped at 2k queries/month; see the Brave dashboard for
|
|
current quotas.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from typing import Any, Dict
|
|
|
|
from tools.web_providers.base import WebSearchProvider
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
|
|
|
|
|
class BraveFreeSearchProvider(WebSearchProvider):
|
|
"""Search via the Brave Search API (free tier).
|
|
|
|
Requires ``BRAVE_SEARCH_API_KEY`` to be set. The value is passed as the
|
|
``X-Subscription-Token`` header. No extract capability — pair with
|
|
Firecrawl/Tavily/Exa/Parallel when you also need ``web_extract``.
|
|
"""
|
|
|
|
def provider_name(self) -> str:
|
|
return "brave-free"
|
|
|
|
def is_configured(self) -> bool:
|
|
"""Return True when ``BRAVE_SEARCH_API_KEY`` is set to a non-empty value."""
|
|
return bool(os.getenv("BRAVE_SEARCH_API_KEY", "").strip())
|
|
|
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
|
"""Execute a search against the Brave Search API.
|
|
|
|
Returns normalized results::
|
|
|
|
{
|
|
"success": True,
|
|
"data": {
|
|
"web": [
|
|
{
|
|
"title": str,
|
|
"url": str,
|
|
"description": str,
|
|
"position": int,
|
|
},
|
|
...
|
|
]
|
|
}
|
|
}
|
|
|
|
On failure returns ``{"success": False, "error": str}``.
|
|
"""
|
|
import httpx
|
|
|
|
api_key = os.getenv("BRAVE_SEARCH_API_KEY", "").strip()
|
|
if not api_key:
|
|
return {"success": False, "error": "BRAVE_SEARCH_API_KEY is not set"}
|
|
|
|
# Brave's `count` is capped at 20.
|
|
count = max(1, min(int(limit), 20))
|
|
|
|
try:
|
|
resp = httpx.get(
|
|
_BRAVE_ENDPOINT,
|
|
params={"q": query, "count": count},
|
|
headers={
|
|
"X-Subscription-Token": api_key,
|
|
"Accept": "application/json",
|
|
},
|
|
timeout=15,
|
|
)
|
|
resp.raise_for_status()
|
|
except httpx.HTTPStatusError as exc:
|
|
logger.warning("Brave Search HTTP error: %s", exc)
|
|
return {
|
|
"success": False,
|
|
"error": f"Brave Search returned HTTP {exc.response.status_code}",
|
|
}
|
|
except httpx.RequestError as exc:
|
|
logger.warning("Brave Search request error: %s", exc)
|
|
return {"success": False, "error": f"Could not reach Brave Search: {exc}"}
|
|
|
|
try:
|
|
data = resp.json()
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning("Brave Search response parse error: %s", exc)
|
|
return {"success": False, "error": "Could not parse Brave Search response as JSON"}
|
|
|
|
raw_results = (data.get("web") or {}).get("results", []) or []
|
|
truncated = raw_results[:limit]
|
|
|
|
web_results = [
|
|
{
|
|
"title": str(r.get("title", "")),
|
|
"url": str(r.get("url", "")),
|
|
"description": str(r.get("description", "")),
|
|
"position": i + 1,
|
|
}
|
|
for i, r in enumerate(truncated)
|
|
]
|
|
|
|
logger.info(
|
|
"Brave Search '%s': %d results (from %d raw, limit %d)",
|
|
query,
|
|
len(web_results),
|
|
len(raw_results),
|
|
limit,
|
|
)
|
|
|
|
return {"success": True, "data": {"web": web_results}}
|