feat(web): add Brave Search (free tier) and DDGS search providers

Both implement WebSearchProvider via tools/web_providers/ — matching the existing SearXNG pattern (PR #5c906d702). Search-only; pair with any extract provider via web.extract_backend. - tools/web_providers/brave_free.py — Brave Search API (free tier, 2k queries/mo). Uses BRAVE_SEARCH_API_KEY as X-Subscription-Token. - tools/web_providers/ddgs.py — DuckDuckGo via the ddgs Python package. No API key; gated on package importability. - tools/web_tools.py: both backends added to _get_backend() config list and auto-detect chain (trails paid providers), _is_backend_available, web_search_tool dispatch, web_extract_tool + web_crawl_tool search-only refusals, check_web_api_key, and the __main__ diagnostic. Introduces _ddgs_package_importable() helper so tests can monkeypatch a single symbol for the ddgs availability check. - hermes_cli/tools_config.py: picker entries for both providers; ddgs gets a post_setup handler that runs `pip install ddgs`. - hermes_cli/config.py: BRAVE_SEARCH_API_KEY in OPTIONAL_ENV_VARS. - scripts/release.py: AUTHOR_MAP entry for @Abd0r. - tests: 14 new tests (brave-free) + 15 new tests (ddgs) covering provider unit behavior, backend wiring, and search-only refusals. Salvages the brave-free + ddgs portion of PR #19796. Not included: the in-line helpers in web_tools.py (replaced with provider modules to match the shipped architecture), the lynx-based extract path (these backends should refuse extract with a clear error — users pair with a real extract provider), and scripts/start-llama-server.sh (unrelated). Co-authored-by: Abd0r <223003280+Abd0r@users.noreply.github.com>
2026-05-13 03:52:00 +00:00 · 2026-05-07 07:23:03 -07:00 · 2026-05-07 07:23:03 -07:00 · 04193cf71c
commit 04193cf71c
parent cdc0a47dd5
8 changed files with 862 additions and 9 deletions
--- a/tools/web_providers/brave_free.py
+++ b/tools/web_providers/brave_free.py
@ -0,0 +1,130 @@
+"""Brave Search web search provider (free tier).
+
+Brave Search's Data-for-Search API offers a free tier (2,000 queries/mo at the
+time of writing) after signing up at https://brave.com/search/api/.  This
+provider implements ``WebSearchProvider`` only — the Data-for-Search endpoint
+returns search results, it does not extract/crawl arbitrary URLs.
+
+Configuration::
+
+    # ~/.hermes/.env
+    BRAVE_SEARCH_API_KEY=your-subscription-token
+
+    # ~/.hermes/config.yaml
+    web:
+      search_backend: "brave-free"
+      extract_backend: "firecrawl"    # pair with an extract provider if needed
+
+The API uses the ``X-Subscription-Token`` header.  Free-tier keys are rate
+limited (1 qps) and capped at 2k queries/month; see the Brave dashboard for
+current quotas.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict
+
+from tools.web_providers.base import WebSearchProvider
+
+logger = logging.getLogger(__name__)
+
+_BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
+
+
+class BraveFreeSearchProvider(WebSearchProvider):
+    """Search via the Brave Search API (free tier).
+
+    Requires ``BRAVE_SEARCH_API_KEY`` to be set. The value is passed as the
+    ``X-Subscription-Token`` header. No extract capability — pair with
+    Firecrawl/Tavily/Exa/Parallel when you also need ``web_extract``.
+    """
+
+    def provider_name(self) -> str:
+        return "brave-free"
+
+    def is_configured(self) -> bool:
+        """Return True when ``BRAVE_SEARCH_API_KEY`` is set to a non-empty value."""
+        return bool(os.getenv("BRAVE_SEARCH_API_KEY", "").strip())
+
+    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
+        """Execute a search against the Brave Search API.
+
+        Returns normalized results::
+
+            {
+                "success": True,
+                "data": {
+                    "web": [
+                        {
+                            "title": str,
+                            "url": str,
+                            "description": str,
+                            "position": int,
+                        },
+                        ...
+                    ]
+                }
+            }
+
+        On failure returns ``{"success": False, "error": str}``.
+        """
+        import httpx
+
+        api_key = os.getenv("BRAVE_SEARCH_API_KEY", "").strip()
+        if not api_key:
+            return {"success": False, "error": "BRAVE_SEARCH_API_KEY is not set"}
+
+        # Brave's `count` is capped at 20.
+        count = max(1, min(int(limit), 20))
+
+        try:
+            resp = httpx.get(
+                _BRAVE_ENDPOINT,
+                params={"q": query, "count": count},
+                headers={
+                    "X-Subscription-Token": api_key,
+                    "Accept": "application/json",
+                },
+                timeout=15,
+            )
+            resp.raise_for_status()
+        except httpx.HTTPStatusError as exc:
+            logger.warning("Brave Search HTTP error: %s", exc)
+            return {
+                "success": False,
+                "error": f"Brave Search returned HTTP {exc.response.status_code}",
+            }
+        except httpx.RequestError as exc:
+            logger.warning("Brave Search request error: %s", exc)
+            return {"success": False, "error": f"Could not reach Brave Search: {exc}"}
+
+        try:
+            data = resp.json()
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("Brave Search response parse error: %s", exc)
+            return {"success": False, "error": "Could not parse Brave Search response as JSON"}
+
+        raw_results = (data.get("web") or {}).get("results", []) or []
+        truncated = raw_results[:limit]
+
+        web_results = [
+            {
+                "title": str(r.get("title", "")),
+                "url": str(r.get("url", "")),
+                "description": str(r.get("description", "")),
+                "position": i + 1,
+            }
+            for i, r in enumerate(truncated)
+        ]
+
+        logger.info(
+            "Brave Search '%s': %d results (from %d raw, limit %d)",
+            query,
+            len(web_results),
+            len(raw_results),
+            limit,
+        )
+
+        return {"success": True, "data": {"web": web_results}}
--- a/tools/web_providers/ddgs.py
+++ b/tools/web_providers/ddgs.py
@ -0,0 +1,98 @@
+"""DuckDuckGo web search provider via the ``ddgs`` Python package.
+
+DuckDuckGo does not provide an official programmatic search API.  The
+community-maintained `ddgs <https://pypi.org/project/ddgs/>`_ package (the
+renamed successor of ``duckduckgo-search``) scrapes DuckDuckGo's HTML results
+page and normalizes them.  It implements ``WebSearchProvider`` only — there is
+no extract capability.
+
+Configuration::
+
+    # No API key required. Enable by installing the package and pointing the
+    # web backend at ddgs:
+    pip install ddgs
+
+    # ~/.hermes/config.yaml
+    web:
+      search_backend: "ddgs"
+      extract_backend: "firecrawl"    # pair with an extract provider if needed
+
+Rate limits are enforced server-side by DuckDuckGo.  Expect intermittent
+``DuckDuckGoSearchException`` / 202 responses under heavy use; this provider
+surfaces them as ``{"success": False, "error": ...}`` rather than crashing
+the tool call.
+
+See https://duckduckgo.com/?q=duckduckgo+tos for terms of use.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict
+
+from tools.web_providers.base import WebSearchProvider
+
+logger = logging.getLogger(__name__)
+
+
+class DDGSSearchProvider(WebSearchProvider):
+    """Search via the ``ddgs`` package (DuckDuckGo HTML scrape).
+
+    No API key required.  The provider is considered "configured" when the
+    ``ddgs`` package is importable — there is nothing else to set up.
+    """
+
+    def provider_name(self) -> str:
+        return "ddgs"
+
+    def is_configured(self) -> bool:
+        """Return True when the ``ddgs`` package is importable.
+
+        Called at tool-registration time; must not perform network I/O.
+        """
+        try:
+            import ddgs  # noqa: F401
+            return True
+        except ImportError:
+            return False
+
+    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
+        """Execute a DuckDuckGo search and return normalized results.
+
+        Returns ``{"success": True, "data": {"web": [...]}}`` on success or
+        ``{"success": False, "error": str}`` on failure (missing package,
+        rate-limited, network error, etc.).
+        """
+        try:
+            from ddgs import DDGS  # type: ignore
+        except ImportError:
+            return {
+                "success": False,
+                "error": "ddgs package is not installed — run `pip install ddgs`",
+            }
+
+        # DDGS().text yields at most `max_results` items; we cap defensively
+        # in case the package ignores the hint.
+        safe_limit = max(1, int(limit))
+
+        try:
+            web_results = []
+            with DDGS() as client:
+                for i, hit in enumerate(client.text(query, max_results=safe_limit)):
+                    if i >= safe_limit:
+                        break
+                    url = str(hit.get("href") or hit.get("url") or "")
+                    web_results.append(
+                        {
+                            "title": str(hit.get("title", "")),
+                            "url": url,
+                            "description": str(hit.get("body", "")),
+                            "position": i + 1,
+                        }
+                    )
+        except Exception as exc:  # noqa: BLE001 — ddgs raises its own exceptions
+            logger.warning("DDGS search error: %s", exc)
+            return {"success": False, "error": f"DuckDuckGo search failed: {exc}"}
+
+        logger.info("DDGS search '%s': %d results (limit %d)", query, len(web_results), limit)
+        return {"success": True, "data": {"web": web_results}}