refactor(web): remove legacy in-tree provider modules

Deletes tools/web_providers/{brave_free,ddgs,searxng}.py — the three providers that moved to plugins/web/ in prior commits. tools/web_tools.py no longer imports them (registry dispatch as of d8735963f), so removing them is purely a cleanup pass. Also migrates the existing tests to the new import paths: tests/tools/test_web_providers_brave_free.py tests/tools/test_web_providers_ddgs.py tests/tools/test_web_providers_searxng.py Mechanical rewrites: - `from tools.web_providers.X import YSearchProvider` -> `from plugins.web.X.provider import YWebSearchProvider` - `.is_configured()` -> `.is_available()` (legacy method -> new method) - `.provider_name()` -> `.name` (legacy method -> new property) - `from tools.web_providers.base import WebSearchProvider` -> `from agent.web_search_provider import WebSearchProvider` (the subclass-check asserts membership in the new plugin-facing ABC) - `sys.modules.delitem("tools.web_providers.ddgs")` updated to point at `plugins.web.ddgs.provider` (cache-busting for lazy ddgs imports) The TestXBackendWiring / TestXSearchOnlyErrors classes (covering _is_backend_available, _get_backend, check_web_api_key, and the "search-only" error paths in web_extract/web_crawl) are untouched — those still test web_tools.py's backend-selection logic, which continues to recognize the names "brave-free" / "ddgs" / "searxng" even after the modules behind them moved to plugins. tools/web_providers/base.py is intentionally NOT deleted by this commit — it's the parent ABC of the legacy modules and shares its name with agent/web_search_provider.py::WebSearchProvider. Removing it surfaces the naming collision (see PR description Finding 0); the real migration PR deletes it in the same commit that drops the _WEB_PLUGIN_SKIPLIST guards in hermes_cli/tools_config.py. Test results: bash scripts/run_tests.sh tests/tools/test_web_providers_*.py -> 65 passed in 3.41s (all rewritten unit tests + unchanged integration tests) bash scripts/run_tests.sh tests/tools/test_web_*.py -> 141 passed in 4.70s (full web test set, post-deletion)
2026-05-18 04:41:56 +00:00 · 2026-05-13 23:52:02 +05:30 · 2026-05-13 23:52:02 +05:30 · 6b219f5af6
commit 6b219f5af6
parent 714630110b
9 changed files with 105 additions and 462 deletions
--- a/tools/web_providers/brave_free.py
+++ b/tools/web_providers/brave_free.py
@ -1,130 +0,0 @@
-"""Brave Search web search provider (free tier).
-
-Brave Search's Data-for-Search API offers a free tier (2,000 queries/mo at the
-time of writing) after signing up at https://brave.com/search/api/.  This
-provider implements ``WebSearchProvider`` only — the Data-for-Search endpoint
-returns search results, it does not extract/crawl arbitrary URLs.
-
-Configuration::
-
-    # ~/.hermes/.env
-    BRAVE_SEARCH_API_KEY=your-subscription-token
-
-    # ~/.hermes/config.yaml
-    web:
-      search_backend: "brave-free"
-      extract_backend: "firecrawl"    # pair with an extract provider if needed
-
-The API uses the ``X-Subscription-Token`` header.  Free-tier keys are rate
-limited (1 qps) and capped at 2k queries/month; see the Brave dashboard for
-current quotas.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from typing import Any, Dict
-
-from tools.web_providers.base import WebSearchProvider
-
-logger = logging.getLogger(__name__)
-
-_BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
-
-
-class BraveFreeSearchProvider(WebSearchProvider):
-    """Search via the Brave Search API (free tier).
-
-    Requires ``BRAVE_SEARCH_API_KEY`` to be set. The value is passed as the
-    ``X-Subscription-Token`` header. No extract capability — pair with
-    Firecrawl/Tavily/Exa/Parallel when you also need ``web_extract``.
-    """
-
-    def provider_name(self) -> str:
-        return "brave-free"
-
-    def is_configured(self) -> bool:
-        """Return True when ``BRAVE_SEARCH_API_KEY`` is set to a non-empty value."""
-        return bool(os.getenv("BRAVE_SEARCH_API_KEY", "").strip())
-
-    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
-        """Execute a search against the Brave Search API.
-
-        Returns normalized results::
-
-            {
-                "success": True,
-                "data": {
-                    "web": [
-                        {
-                            "title": str,
-                            "url": str,
-                            "description": str,
-                            "position": int,
-                        },
-                        ...
-                    ]
-                }
-            }
-
-        On failure returns ``{"success": False, "error": str}``.
-        """
-        import httpx
-
-        api_key = os.getenv("BRAVE_SEARCH_API_KEY", "").strip()
-        if not api_key:
-            return {"success": False, "error": "BRAVE_SEARCH_API_KEY is not set"}
-
-        # Brave's `count` is capped at 20.
-        count = max(1, min(int(limit), 20))
-
-        try:
-            resp = httpx.get(
-                _BRAVE_ENDPOINT,
-                params={"q": query, "count": count},
-                headers={
-                    "X-Subscription-Token": api_key,
-                    "Accept": "application/json",
-                },
-                timeout=15,
-            )
-            resp.raise_for_status()
-        except httpx.HTTPStatusError as exc:
-            logger.warning("Brave Search HTTP error: %s", exc)
-            return {
-                "success": False,
-                "error": f"Brave Search returned HTTP {exc.response.status_code}",
-            }
-        except httpx.RequestError as exc:
-            logger.warning("Brave Search request error: %s", exc)
-            return {"success": False, "error": f"Could not reach Brave Search: {exc}"}
-
-        try:
-            data = resp.json()
-        except Exception as exc:  # noqa: BLE001
-            logger.warning("Brave Search response parse error: %s", exc)
-            return {"success": False, "error": "Could not parse Brave Search response as JSON"}
-
-        raw_results = (data.get("web") or {}).get("results", []) or []
-        truncated = raw_results[:limit]
-
-        web_results = [
-            {
-                "title": str(r.get("title", "")),
-                "url": str(r.get("url", "")),
-                "description": str(r.get("description", "")),
-                "position": i + 1,
-            }
-            for i, r in enumerate(truncated)
-        ]
-
-        logger.info(
-            "Brave Search '%s': %d results (from %d raw, limit %d)",
-            query,
-            len(web_results),
-            len(raw_results),
-            limit,
-        )
-
-        return {"success": True, "data": {"web": web_results}}
--- a/tools/web_providers/ddgs.py
+++ b/tools/web_providers/ddgs.py
@ -1,98 +0,0 @@
-"""DuckDuckGo web search provider via the ``ddgs`` Python package.
-
-DuckDuckGo does not provide an official programmatic search API.  The
-community-maintained `ddgs <https://pypi.org/project/ddgs/>`_ package (the
-renamed successor of ``duckduckgo-search``) scrapes DuckDuckGo's HTML results
-page and normalizes them.  It implements ``WebSearchProvider`` only — there is
-no extract capability.
-
-Configuration::
-
-    # No API key required. Enable by installing the package and pointing the
-    # web backend at ddgs:
-    pip install ddgs
-
-    # ~/.hermes/config.yaml
-    web:
-      search_backend: "ddgs"
-      extract_backend: "firecrawl"    # pair with an extract provider if needed
-
-Rate limits are enforced server-side by DuckDuckGo.  Expect intermittent
-``DuckDuckGoSearchException`` / 202 responses under heavy use; this provider
-surfaces them as ``{"success": False, "error": ...}`` rather than crashing
-the tool call.
-
-See https://duckduckgo.com/?q=duckduckgo+tos for terms of use.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any, Dict
-
-from tools.web_providers.base import WebSearchProvider
-
-logger = logging.getLogger(__name__)
-
-
-class DDGSSearchProvider(WebSearchProvider):
-    """Search via the ``ddgs`` package (DuckDuckGo HTML scrape).
-
-    No API key required.  The provider is considered "configured" when the
-    ``ddgs`` package is importable — there is nothing else to set up.
-    """
-
-    def provider_name(self) -> str:
-        return "ddgs"
-
-    def is_configured(self) -> bool:
-        """Return True when the ``ddgs`` package is importable.
-
-        Called at tool-registration time; must not perform network I/O.
-        """
-        try:
-            import ddgs  # noqa: F401
-            return True
-        except ImportError:
-            return False
-
-    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
-        """Execute a DuckDuckGo search and return normalized results.
-
-        Returns ``{"success": True, "data": {"web": [...]}}`` on success or
-        ``{"success": False, "error": str}`` on failure (missing package,
-        rate-limited, network error, etc.).
-        """
-        try:
-            from ddgs import DDGS  # type: ignore
-        except ImportError:
-            return {
-                "success": False,
-                "error": "ddgs package is not installed — run `pip install ddgs`",
-            }
-
-        # DDGS().text yields at most `max_results` items; we cap defensively
-        # in case the package ignores the hint.
-        safe_limit = max(1, int(limit))
-
-        try:
-            web_results = []
-            with DDGS() as client:
-                for i, hit in enumerate(client.text(query, max_results=safe_limit)):
-                    if i >= safe_limit:
-                        break
-                    url = str(hit.get("href") or hit.get("url") or "")
-                    web_results.append(
-                        {
-                            "title": str(hit.get("title", "")),
-                            "url": url,
-                            "description": str(hit.get("body", "")),
-                            "position": i + 1,
-                        }
-                    )
-        except Exception as exc:  # noqa: BLE001 — ddgs raises its own exceptions
-            logger.warning("DDGS search error: %s", exc)
-            return {"success": False, "error": f"DuckDuckGo search failed: {exc}"}
-
-        logger.info("DDGS search '%s': %d results (limit %d)", query, len(web_results), limit)
-        return {"success": True, "data": {"web": web_results}}
--- a/tools/web_providers/searxng.py
+++ b/tools/web_providers/searxng.py
@ -1,132 +0,0 @@
-"""SearXNG web search provider.
-
-SearXNG is a free, self-hosted, privacy-respecting metasearch engine.
-It implements ``WebSearchProvider`` only — there is no extract capability.
-
-Configuration::
-
-    # ~/.hermes/.env
-    SEARXNG_URL=http://localhost:8080
-
-    # Use SearXNG for search, pair with any extract provider:
-    # ~/.hermes/config.yaml
-    web:
-      search_backend: "searxng"
-      extract_backend: "firecrawl"
-
-Public SearXNG instances are listed at https://searx.space/ but self-hosting
-is recommended for production use (rate limits and availability vary per
-public instance).
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from typing import Any, Dict
-
-from tools.web_providers.base import WebSearchProvider
-
-logger = logging.getLogger(__name__)
-
-
-class SearXNGSearchProvider(WebSearchProvider):
-    """Search via a SearXNG instance.
-
-    Requires ``SEARXNG_URL`` to be set (e.g. ``http://localhost:8080``).
-    No API key needed — SearXNG is open-source and self-hosted.
-
-    Uses the SearXNG JSON API (``/search?format=json``).  Results are
-    sorted by SearXNG's own score and truncated to *limit*.
-    """
-
-    def provider_name(self) -> str:
-        return "searxng"
-
-    def is_configured(self) -> bool:
-        """Return True when ``SEARXNG_URL`` is set to a non-empty value."""
-        return bool(os.getenv("SEARXNG_URL", "").strip())
-
-    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
-        """Execute a search against the configured SearXNG instance.
-
-        Returns normalized results::
-
-            {
-                "success": True,
-                "data": {
-                    "web": [
-                        {
-                            "title": str,
-                            "url": str,
-                            "description": str,
-                            "position": int,
-                        },
-                        ...
-                    ]
-                }
-            }
-
-        On failure returns ``{"success": False, "error": str}``.
-        """
-        import httpx
-
-        base_url = os.getenv("SEARXNG_URL", "").strip().rstrip("/")
-        if not base_url:
-            return {"success": False, "error": "SEARXNG_URL is not set"}
-
-        params: Dict[str, Any] = {
-            "q": query,
-            "format": "json",
-            "pageno": 1,
-        }
-
-        try:
-            resp = httpx.get(
-                f"{base_url}/search",
-                params=params,
-                timeout=15,
-                headers={"Accept": "application/json"},
-            )
-            resp.raise_for_status()
-        except httpx.HTTPStatusError as exc:
-            logger.warning("SearXNG HTTP error: %s", exc)
-            return {"success": False, "error": f"SearXNG returned HTTP {exc.response.status_code}"}
-        except httpx.RequestError as exc:
-            logger.warning("SearXNG request error: %s", exc)
-            return {"success": False, "error": f"Could not reach SearXNG at {base_url}: {exc}"}
-
-        try:
-            data = resp.json()
-        except Exception as exc:  # noqa: BLE001
-            logger.warning("SearXNG response parse error: %s", exc)
-            return {"success": False, "error": "Could not parse SearXNG response as JSON"}
-
-        raw_results = data.get("results", [])
-
-        # SearXNG may return a score field; sort descending and cap to limit.
-        sorted_results = sorted(
-            raw_results,
-            key=lambda r: float(r.get("score", 0)),
-            reverse=True,
-        )[:limit]
-
-        web_results = [
-            {
-                "title": str(r.get("title", "")),
-                "url": str(r.get("url", "")),
-                "description": str(r.get("content", "")),
-                "position": i + 1,
-            }
-            for i, r in enumerate(sorted_results)
-        ]
-
-        logger.info(
-            "SearXNG search '%s': %d results (from %d raw, limit %d)",
-            query,
-            len(web_results),
-            len(raw_results),
-            limit,
-        )
-
-        return {"success": True, "data": {"web": web_results}}