diff --git a/agent/web_search_provider.py b/agent/web_search_provider.py index 0e8b31547fa..ed3f79f270e 100644 --- a/agent/web_search_provider.py +++ b/agent/web_search_provider.py @@ -12,13 +12,14 @@ Providers live in ``/plugins/web//`` (built-in, auto-loaded as ``kind: backend``) or ``~/.hermes/plugins/web//`` (user, opt-in via ``plugins.enabled``). -This ABC is the plugin-facing surface. The legacy -:mod:`tools.web_providers.base` module retains its own ABCs for in-tree -consumers that haven't migrated yet; over time those will all flow through -this provider. +This ABC is the SINGLE plugin-facing surface for web providers — every +provider in the tree (brave-free, ddgs, searxng, exa, parallel, tavily, +firecrawl) implements it. The legacy in-tree ``tools.web_providers.base`` +ABCs were deleted in PR #25182 along with the per-vendor inline helpers +in ``tools/web_tools.py``; the response-shape contract documented below +is preserved bit-for-bit so the tool wrapper does not have to translate. -Response shape (mirrors the legacy contract in ``tools/web_providers/base.py`` -so the tool wrapper does not have to translate): +Response shape (preserved from the legacy contract): Search results:: diff --git a/tests/tools/test_web_providers.py b/tests/tools/test_web_providers.py index 3c0abb307b0..c64b0a1b621 100644 --- a/tests/tools/test_web_providers.py +++ b/tests/tools/test_web_providers.py @@ -20,50 +20,121 @@ import pytest class TestWebProviderABCs: - """The ABCs enforce the interface contract.""" + """The unified WebSearchProvider ABC enforces the interface contract. - def test_cannot_instantiate_search_provider(self): - from tools.web_providers.base import WebSearchProvider + After PR #25182, all seven providers are subclasses of + :class:`agent.web_search_provider.WebSearchProvider`. The legacy + in-tree ABCs at ``tools.web_providers.base`` (separate + ``WebSearchProvider`` + ``WebExtractProvider``) were deleted in the + same PR — providers now advertise capabilities via + ``supports_search() / supports_extract() / supports_crawl()`` flags. + """ + + def test_cannot_instantiate_abc_directly(self): + from agent.web_search_provider import WebSearchProvider with pytest.raises(TypeError): WebSearchProvider() # type: ignore[abstract] - def test_cannot_instantiate_extract_provider(self): - from tools.web_providers.base import WebExtractProvider - - with pytest.raises(TypeError): - WebExtractProvider() # type: ignore[abstract] - - def test_concrete_search_provider_works(self): - from tools.web_providers.base import WebSearchProvider + def test_concrete_search_only_provider_works(self): + from agent.web_search_provider import WebSearchProvider class Dummy(WebSearchProvider): - def provider_name(self) -> str: + @property + def name(self) -> str: return "dummy" - def is_configured(self) -> bool: + + @property + def display_name(self) -> str: + return "Dummy Search" + + def is_available(self) -> bool: return True + + def supports_search(self) -> bool: + return True + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: return {"success": True, "data": {"web": []}} d = Dummy() - assert d.provider_name() == "dummy" - assert d.is_configured() is True + assert d.name == "dummy" + assert d.display_name == "Dummy Search" + assert d.is_available() is True + assert d.supports_search() is True + assert d.supports_extract() is False # default + assert d.supports_crawl() is False # default assert d.search("test")["success"] is True - def test_concrete_extract_provider_works(self): - from tools.web_providers.base import WebExtractProvider + def test_concrete_multi_capability_provider_works(self): + from agent.web_search_provider import WebSearchProvider - class Dummy(WebExtractProvider): - def provider_name(self) -> str: + class Dummy(WebSearchProvider): + @property + def name(self) -> str: return "dummy" - def is_configured(self) -> bool: + + @property + def display_name(self) -> str: + return "Dummy Multi" + + def is_available(self) -> bool: return True - def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: - return {"success": True, "data": [{"url": urls[0], "content": "x"}]} + + def supports_search(self) -> bool: + return True + + def supports_extract(self) -> bool: + return True + + def supports_crawl(self) -> bool: + return True + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + return {"success": True, "data": {"web": []}} + + def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]: + return [{"url": urls[0], "content": "x"}] + + def crawl(self, url: str, **kwargs: Any) -> Dict[str, Any]: + return {"results": [{"url": url, "content": "x"}]} d = Dummy() - assert d.provider_name() == "dummy" - assert d.extract(["https://example.com"])["success"] is True + assert d.supports_search() is True + assert d.supports_extract() is True + assert d.supports_crawl() is True + assert d.extract(["https://example.com"])[0]["url"] == "https://example.com" + assert d.crawl("https://example.com")["results"][0]["url"] == "https://example.com" + + def test_search_only_provider_skips_extract_and_crawl(self): + """Search-only providers don't have to implement extract() / crawl().""" + from agent.web_search_provider import WebSearchProvider + + class SearchOnly(WebSearchProvider): + @property + def name(self) -> str: + return "search-only" + + @property + def display_name(self) -> str: + return "Search Only" + + def is_available(self) -> bool: + return True + + def supports_search(self) -> bool: + return True + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + return {"success": True, "data": {"web": []}} + + # Should instantiate fine — extract/crawl have default + # supports_*() returning False and aren't required to be + # overridden when not advertised. + s = SearchOnly() + assert s.supports_search() is True + assert s.supports_extract() is False + assert s.supports_crawl() is False # --------------------------------------------------------------------------- diff --git a/tools/web_providers/ARCHITECTURE.md b/tools/web_providers/ARCHITECTURE.md deleted file mode 100644 index f4a7b335e87..00000000000 --- a/tools/web_providers/ARCHITECTURE.md +++ /dev/null @@ -1,73 +0,0 @@ -# Web Tools Provider Architecture - -## Overview - -Web tools (`web_search`, `web_extract`) use a **per-capability backend selection** system that allows different providers for search and extract independently. - -## Config Keys - -```yaml -web: - backend: "firecrawl" # Shared fallback — applies to both if specific keys not set - search_backend: "" # Per-capability override for web_search - extract_backend: "" # Per-capability override for web_extract -``` - -**Selection priority (per capability):** -1. `web.search_backend` / `web.extract_backend` (explicit per-capability) -2. `web.backend` (shared fallback) -3. Auto-detect from environment variables - -When per-capability keys are empty (default), behavior is identical to the legacy single-backend selection. - -## Architecture - -``` -web_search_tool() - └─ _get_search_backend() - ├─ web.search_backend (if set + available) - └─ _get_backend() fallback - -web_extract_tool() - └─ _get_extract_backend() - ├─ web.extract_backend (if set + available) - └─ _get_backend() fallback -``` - -## Provider ABCs - -New providers implement these interfaces in `tools/web_providers/`: - -```python -from tools.web_providers.base import WebSearchProvider, WebExtractProvider - -class MySearchProvider(WebSearchProvider): - def provider_name(self) -> str: ... - def is_configured(self) -> bool: ... - def search(self, query: str, limit: int = 5) -> Dict[str, Any]: ... - -class MyExtractProvider(WebExtractProvider): - def provider_name(self) -> str: ... - def is_configured(self) -> bool: ... - def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: ... -``` - -## Adding a New Search Provider - -1. Create `tools/web_providers/your_provider.py` implementing `WebSearchProvider` -2. Add availability check to `_is_backend_available()` in `web_tools.py` -3. Add dispatch branch in `web_search_tool()` -4. Add provider to `hermes tools` picker in `tools_config.py` -5. Add env var to `OPTIONAL_ENV_VARS` in `config.py` (if needed) -6. Write tests in `tests/tools/` - -Search-only providers (like SearXNG) don't need to implement `WebExtractProvider`. -Extract-only providers don't need to implement `WebSearchProvider`. - -## hermes tools UX - -The provider picker uses **progressive disclosure**: -- **Default path** (90% of users): Pick one provider → sets `web.backend` for both. One selection, done. -- **Advanced path**: "Configure separately" option at bottom → two-step sub-picker for search + extract independently. - -See `.hermes/plans/2026-05-03-web-tools-provider-architecture.md` for the full UX flow diagram. diff --git a/tools/web_providers/__init__.py b/tools/web_providers/__init__.py deleted file mode 100644 index 15134175d21..00000000000 --- a/tools/web_providers/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Web capability providers — search, extract, crawl. - -Each capability has an ABC in ``base.py`` and vendor implementations in -sibling modules. Provider registries in ``web_tools.py`` map config names -to provider classes. -""" diff --git a/tools/web_providers/base.py b/tools/web_providers/base.py deleted file mode 100644 index 21772189191..00000000000 --- a/tools/web_providers/base.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Abstract base classes for web capability providers.""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any, Dict, List - - -class WebSearchProvider(ABC): - """Interface for web search backends (Firecrawl, Tavily, Exa, etc.). - - Implementations live in sibling modules. The user selects a provider - via ``hermes tools``; the choice is persisted as - ``config["web"]["search_backend"]`` (falling back to - ``config["web"]["backend"]``). - - Search providers return results in a normalized format:: - - { - "success": True, - "data": { - "web": [ - {"title": str, "url": str, "description": str, "position": int}, - ... - ] - } - } - - On failure:: - - {"success": False, "error": str} - """ - - @abstractmethod - def provider_name(self) -> str: - """Short, human-readable name shown in logs and diagnostics.""" - - @abstractmethod - def is_configured(self) -> bool: - """Return True when all required env vars / credentials are present. - - Called at tool-registration time to gate availability. - Must be cheap — no network calls. - """ - - @abstractmethod - def search(self, query: str, limit: int = 5) -> Dict[str, Any]: - """Execute a web search and return normalized results.""" - - -class WebExtractProvider(ABC): - """Interface for web content extraction backends. - - Implementations live in sibling modules. The user selects a provider - via ``hermes tools``; the choice is persisted as - ``config["web"]["extract_backend"]`` (falling back to - ``config["web"]["backend"]``). - - Extract providers return results in a normalized format:: - - { - "success": True, - "data": [ - {"url": str, "title": str, "content": str, - "raw_content": str, "metadata": dict}, - ... - ] - } - - On failure:: - - {"success": False, "error": str} - """ - - @abstractmethod - def provider_name(self) -> str: - """Short, human-readable name shown in logs and diagnostics.""" - - @abstractmethod - def is_configured(self) -> bool: - """Return True when all required env vars / credentials are present. - - Called at tool-registration time to gate availability. - Must be cheap — no network calls. - """ - - @abstractmethod - def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: - """Extract content from the given URLs and return normalized results."""