feat(web): add Brave Search (free tier) and DDGS search providers

Both implement WebSearchProvider via tools/web_providers/ — matching the
existing SearXNG pattern (PR #5c906d702). Search-only; pair with any
extract provider via web.extract_backend.

- tools/web_providers/brave_free.py — Brave Search API (free tier, 2k
  queries/mo). Uses BRAVE_SEARCH_API_KEY as X-Subscription-Token.
- tools/web_providers/ddgs.py — DuckDuckGo via the ddgs Python package.
  No API key; gated on package importability.
- tools/web_tools.py: both backends added to _get_backend() config list
  and auto-detect chain (trails paid providers), _is_backend_available,
  web_search_tool dispatch, web_extract_tool + web_crawl_tool search-only
  refusals, check_web_api_key, and the __main__ diagnostic. Introduces
  _ddgs_package_importable() helper so tests can monkeypatch a single
  symbol for the ddgs availability check.
- hermes_cli/tools_config.py: picker entries for both providers; ddgs
  gets a post_setup handler that runs `pip install ddgs`.
- hermes_cli/config.py: BRAVE_SEARCH_API_KEY in OPTIONAL_ENV_VARS.
- scripts/release.py: AUTHOR_MAP entry for @Abd0r.
- tests: 14 new tests (brave-free) + 15 new tests (ddgs) covering
  provider unit behavior, backend wiring, and search-only refusals.

Salvages the brave-free + ddgs portion of PR #19796. Not included: the
in-line helpers in web_tools.py (replaced with provider modules to match
the shipped architecture), the lynx-based extract path (these backends
should refuse extract with a clear error — users pair with a real
extract provider), and scripts/start-llama-server.sh (unrelated).

Co-authored-by: Abd0r <223003280+Abd0r@users.noreply.github.com>
This commit is contained in:
Abd0r 2026-05-07 07:23:03 -07:00 committed by Teknium
parent cdc0a47dd5
commit 04193cf71c
8 changed files with 862 additions and 9 deletions

View file

@ -126,18 +126,22 @@ def _get_backend() -> str:
keys manually without running setup.
"""
configured = (_load_web_config().get("backend") or "").lower().strip()
if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng"):
if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs"):
return configured
# Fallback for manual / legacy config — pick the highest-priority
# available backend. Firecrawl also counts as available when the managed
# tool gateway is configured for Nous subscribers.
# Free-tier backends (searxng / brave-free / ddgs) trail the paid ones so
# existing paid setups are unaffected.
backend_candidates = (
("firecrawl", _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") or _is_tool_gateway_ready()),
("parallel", _has_env("PARALLEL_API_KEY")),
("tavily", _has_env("TAVILY_API_KEY")),
("exa", _has_env("EXA_API_KEY")),
("searxng", _has_env("SEARXNG_URL")),
("brave-free", _has_env("BRAVE_SEARCH_API_KEY")),
("ddgs", _ddgs_package_importable()),
)
for backend, available in backend_candidates:
if available:
@ -196,8 +200,27 @@ def _is_backend_available(backend: str) -> bool:
return _has_env("TAVILY_API_KEY")
if backend == "searxng":
return _has_env("SEARXNG_URL")
if backend == "brave-free":
return _has_env("BRAVE_SEARCH_API_KEY")
if backend == "ddgs":
return _ddgs_package_importable()
return False
def _ddgs_package_importable() -> bool:
"""Return True when the ``ddgs`` Python package can be imported.
ddgs is the only backend whose availability is driven by a package
presence rather than an env var / config entry. Wrapped in a helper
so auto-detect and ``_is_backend_available`` share the same check
(and tests can monkeypatch a single symbol).
"""
try:
import ddgs # noqa: F401
return True
except ImportError:
return False
# ─── Firecrawl Client ────────────────────────────────────────────────────────
_firecrawl_client = None
@ -1200,6 +1223,26 @@ def web_search_tool(query: str, limit: int = 5) -> str:
_debug.save()
return result_json
if backend == "brave-free":
from tools.web_providers.brave_free import BraveFreeSearchProvider
response_data = BraveFreeSearchProvider().search(query, limit)
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
debug_call_data["final_response_size"] = len(result_json)
_debug.log_call("web_search_tool", debug_call_data)
_debug.save()
return result_json
if backend == "ddgs":
from tools.web_providers.ddgs import DDGSSearchProvider
response_data = DDGSSearchProvider().search(query, limit)
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
debug_call_data["final_response_size"] = len(result_json)
_debug.log_call("web_search_tool", debug_call_data)
_debug.save()
return result_json
if backend == "tavily":
logger.info("Tavily search: '%s' (limit: %d)", query, limit)
raw = _tavily_request("search", {
@ -1350,11 +1393,12 @@ async def web_extract_tool(
"include_images": False,
})
results = _normalize_tavily_documents(raw, fallback_url=safe_urls[0] if safe_urls else "")
elif backend == "searxng":
# SearXNG is search-only — it cannot extract URL content
elif backend in ("searxng", "brave-free", "ddgs"):
# These backends are search-only — they cannot extract URL content
_label = {"searxng": "SearXNG", "brave-free": "Brave Search (free tier)", "ddgs": "DuckDuckGo (ddgs)"}[backend]
return json.dumps({
"success": False,
"error": "SearXNG is a search-only backend and cannot extract URL content. "
"error": f"{_label} is a search-only backend and cannot extract URL content. "
"Set web.extract_backend to firecrawl, tavily, exa, or parallel.",
}, ensure_ascii=False)
else:
@ -1732,10 +1776,11 @@ async def web_crawl_tool(
_debug.save()
return cleaned_result
# SearXNG is search-only — it cannot crawl
if backend == "searxng":
# SearXNG / Brave Search (free tier) / DuckDuckGo (ddgs) are search-only — they cannot crawl
if backend in ("searxng", "brave-free", "ddgs"):
_label = {"searxng": "SearXNG", "brave-free": "Brave Search (free tier)", "ddgs": "DuckDuckGo (ddgs)"}[backend]
return json.dumps({
"error": "SearXNG is a search-only backend and cannot crawl URLs. "
"error": f"{_label} is a search-only backend and cannot crawl URLs. "
"Set FIRECRAWL_API_KEY for crawling, or use web_search instead.",
"success": False,
}, ensure_ascii=False)
@ -2035,9 +2080,12 @@ def check_firecrawl_api_key() -> bool:
def check_web_api_key() -> bool:
"""Check whether the configured web backend is available."""
configured = _load_web_config().get("backend", "").lower().strip()
if configured in ("exa", "parallel", "firecrawl", "tavily", "searxng"):
if configured in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs"):
return _is_backend_available(configured)
return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng"))
return any(
_is_backend_available(backend)
for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs")
)
def check_auxiliary_model() -> bool:
@ -2074,6 +2122,10 @@ if __name__ == "__main__":
print(" Using Tavily API (https://tavily.com)")
elif backend == "searxng":
print(f" Using SearXNG (search only): {os.getenv('SEARXNG_URL', '').strip()}")
elif backend == "brave-free":
print(" Using Brave Search free tier (search only)")
elif backend == "ddgs":
print(" Using DuckDuckGo via ddgs package (search only)")
else:
if firecrawl_url_available:
print(f" Using self-hosted Firecrawl: {os.getenv('FIRECRAWL_API_URL').strip().rstrip('/')}")