From 39b4ebfceaeeb56d1c197dd22028053e5c2c1190 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 14 May 2026 00:56:11 +0530 Subject: [PATCH] refactor(web): delete legacy tools/web_providers/ directory + migrate ABC tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the legacy in-tree provider scaffolding that PR #25182 fully replaced with the plugin architecture: tools/web_providers/__init__.py (6 lines) tools/web_providers/base.py (89 lines — old ABCs) tools/web_providers/ARCHITECTURE.md (73 lines — old design doc) These were the staging-ground ABCs and provider modules that the plugin migration absorbed. All seven web providers now implement the single :class:`agent.web_search_provider.WebSearchProvider` ABC and live under ``plugins/web//``. Nothing else in the tree imports ``tools.web_providers`` — verified via grep before deletion. Test migration (tests/tools/test_web_providers.py) -------------------------------------------------- Rewrote ``TestWebProviderABCs`` to test the new unified ABC at :mod:`agent.web_search_provider`: - test_cannot_instantiate_abc_directly — abstract ``name`` + ``is_available`` - test_concrete_search_only_provider_works — exercise default ``supports_extract=False`` / ``supports_crawl=False`` flags - test_concrete_multi_capability_provider_works — exercise all three capabilities, async extract supported (declared sync here for simplicity; real plugins like parallel + firecrawl use async) - test_search_only_provider_skips_extract_and_crawl — verify ``supports_*()`` flags default to False so search-only providers don't have to implement extract() or crawl() The 9 other tests in the file (per-capability backend selection, DEFAULT_CONFIG merge, dispatcher routing) test public helpers in ``tools.web_tools`` that still exist and pass unchanged. agent/web_search_provider.py docstring updated to reflect that the legacy ABCs no longer exist; the response-shape contract is preserved bit-for-bit so external consumers see no behavioral change. Net diff -------- - tools/web_providers/ removed (-168 lines) - tests/tools/test_web_providers.py rewritten ABC section (+78/-30 net, same coverage, new API) - agent/web_search_provider.py docstring (-3/+5 lines) Verified -------- - 173/173 targeted web tests pass - 12/12 ABC contract tests pass with the new interface - No remaining grep hits for ``tools.web_providers`` outside of intentional historical references in plugin docstrings. --- agent/web_search_provider.py | 13 +-- tests/tools/test_web_providers.py | 119 ++++++++++++++++++++++------ tools/web_providers/ARCHITECTURE.md | 73 ----------------- tools/web_providers/__init__.py | 6 -- tools/web_providers/base.py | 89 --------------------- 5 files changed, 102 insertions(+), 198 deletions(-) delete mode 100644 tools/web_providers/ARCHITECTURE.md delete mode 100644 tools/web_providers/__init__.py delete mode 100644 tools/web_providers/base.py diff --git a/agent/web_search_provider.py b/agent/web_search_provider.py index 0e8b31547fa..ed3f79f270e 100644 --- a/agent/web_search_provider.py +++ b/agent/web_search_provider.py @@ -12,13 +12,14 @@ Providers live in ``/plugins/web//`` (built-in, auto-loaded as ``kind: backend``) or ``~/.hermes/plugins/web//`` (user, opt-in via ``plugins.enabled``). -This ABC is the plugin-facing surface. The legacy -:mod:`tools.web_providers.base` module retains its own ABCs for in-tree -consumers that haven't migrated yet; over time those will all flow through -this provider. +This ABC is the SINGLE plugin-facing surface for web providers — every +provider in the tree (brave-free, ddgs, searxng, exa, parallel, tavily, +firecrawl) implements it. The legacy in-tree ``tools.web_providers.base`` +ABCs were deleted in PR #25182 along with the per-vendor inline helpers +in ``tools/web_tools.py``; the response-shape contract documented below +is preserved bit-for-bit so the tool wrapper does not have to translate. -Response shape (mirrors the legacy contract in ``tools/web_providers/base.py`` -so the tool wrapper does not have to translate): +Response shape (preserved from the legacy contract): Search results:: diff --git a/tests/tools/test_web_providers.py b/tests/tools/test_web_providers.py index 3c0abb307b0..c64b0a1b621 100644 --- a/tests/tools/test_web_providers.py +++ b/tests/tools/test_web_providers.py @@ -20,50 +20,121 @@ import pytest class TestWebProviderABCs: - """The ABCs enforce the interface contract.""" + """The unified WebSearchProvider ABC enforces the interface contract. - def test_cannot_instantiate_search_provider(self): - from tools.web_providers.base import WebSearchProvider + After PR #25182, all seven providers are subclasses of + :class:`agent.web_search_provider.WebSearchProvider`. The legacy + in-tree ABCs at ``tools.web_providers.base`` (separate + ``WebSearchProvider`` + ``WebExtractProvider``) were deleted in the + same PR — providers now advertise capabilities via + ``supports_search() / supports_extract() / supports_crawl()`` flags. + """ + + def test_cannot_instantiate_abc_directly(self): + from agent.web_search_provider import WebSearchProvider with pytest.raises(TypeError): WebSearchProvider() # type: ignore[abstract] - def test_cannot_instantiate_extract_provider(self): - from tools.web_providers.base import WebExtractProvider - - with pytest.raises(TypeError): - WebExtractProvider() # type: ignore[abstract] - - def test_concrete_search_provider_works(self): - from tools.web_providers.base import WebSearchProvider + def test_concrete_search_only_provider_works(self): + from agent.web_search_provider import WebSearchProvider class Dummy(WebSearchProvider): - def provider_name(self) -> str: + @property + def name(self) -> str: return "dummy" - def is_configured(self) -> bool: + + @property + def display_name(self) -> str: + return "Dummy Search" + + def is_available(self) -> bool: return True + + def supports_search(self) -> bool: + return True + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: return {"success": True, "data": {"web": []}} d = Dummy() - assert d.provider_name() == "dummy" - assert d.is_configured() is True + assert d.name == "dummy" + assert d.display_name == "Dummy Search" + assert d.is_available() is True + assert d.supports_search() is True + assert d.supports_extract() is False # default + assert d.supports_crawl() is False # default assert d.search("test")["success"] is True - def test_concrete_extract_provider_works(self): - from tools.web_providers.base import WebExtractProvider + def test_concrete_multi_capability_provider_works(self): + from agent.web_search_provider import WebSearchProvider - class Dummy(WebExtractProvider): - def provider_name(self) -> str: + class Dummy(WebSearchProvider): + @property + def name(self) -> str: return "dummy" - def is_configured(self) -> bool: + + @property + def display_name(self) -> str: + return "Dummy Multi" + + def is_available(self) -> bool: return True - def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: - return {"success": True, "data": [{"url": urls[0], "content": "x"}]} + + def supports_search(self) -> bool: + return True + + def supports_extract(self) -> bool: + return True + + def supports_crawl(self) -> bool: + return True + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + return {"success": True, "data": {"web": []}} + + def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]: + return [{"url": urls[0], "content": "x"}] + + def crawl(self, url: str, **kwargs: Any) -> Dict[str, Any]: + return {"results": [{"url": url, "content": "x"}]} d = Dummy() - assert d.provider_name() == "dummy" - assert d.extract(["https://example.com"])["success"] is True + assert d.supports_search() is True + assert d.supports_extract() is True + assert d.supports_crawl() is True + assert d.extract(["https://example.com"])[0]["url"] == "https://example.com" + assert d.crawl("https://example.com")["results"][0]["url"] == "https://example.com" + + def test_search_only_provider_skips_extract_and_crawl(self): + """Search-only providers don't have to implement extract() / crawl().""" + from agent.web_search_provider import WebSearchProvider + + class SearchOnly(WebSearchProvider): + @property + def name(self) -> str: + return "search-only" + + @property + def display_name(self) -> str: + return "Search Only" + + def is_available(self) -> bool: + return True + + def supports_search(self) -> bool: + return True + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + return {"success": True, "data": {"web": []}} + + # Should instantiate fine — extract/crawl have default + # supports_*() returning False and aren't required to be + # overridden when not advertised. + s = SearchOnly() + assert s.supports_search() is True + assert s.supports_extract() is False + assert s.supports_crawl() is False # --------------------------------------------------------------------------- diff --git a/tools/web_providers/ARCHITECTURE.md b/tools/web_providers/ARCHITECTURE.md deleted file mode 100644 index f4a7b335e87..00000000000 --- a/tools/web_providers/ARCHITECTURE.md +++ /dev/null @@ -1,73 +0,0 @@ -# Web Tools Provider Architecture - -## Overview - -Web tools (`web_search`, `web_extract`) use a **per-capability backend selection** system that allows different providers for search and extract independently. - -## Config Keys - -```yaml -web: - backend: "firecrawl" # Shared fallback — applies to both if specific keys not set - search_backend: "" # Per-capability override for web_search - extract_backend: "" # Per-capability override for web_extract -``` - -**Selection priority (per capability):** -1. `web.search_backend` / `web.extract_backend` (explicit per-capability) -2. `web.backend` (shared fallback) -3. Auto-detect from environment variables - -When per-capability keys are empty (default), behavior is identical to the legacy single-backend selection. - -## Architecture - -``` -web_search_tool() - └─ _get_search_backend() - ├─ web.search_backend (if set + available) - └─ _get_backend() fallback - -web_extract_tool() - └─ _get_extract_backend() - ├─ web.extract_backend (if set + available) - └─ _get_backend() fallback -``` - -## Provider ABCs - -New providers implement these interfaces in `tools/web_providers/`: - -```python -from tools.web_providers.base import WebSearchProvider, WebExtractProvider - -class MySearchProvider(WebSearchProvider): - def provider_name(self) -> str: ... - def is_configured(self) -> bool: ... - def search(self, query: str, limit: int = 5) -> Dict[str, Any]: ... - -class MyExtractProvider(WebExtractProvider): - def provider_name(self) -> str: ... - def is_configured(self) -> bool: ... - def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: ... -``` - -## Adding a New Search Provider - -1. Create `tools/web_providers/your_provider.py` implementing `WebSearchProvider` -2. Add availability check to `_is_backend_available()` in `web_tools.py` -3. Add dispatch branch in `web_search_tool()` -4. Add provider to `hermes tools` picker in `tools_config.py` -5. Add env var to `OPTIONAL_ENV_VARS` in `config.py` (if needed) -6. Write tests in `tests/tools/` - -Search-only providers (like SearXNG) don't need to implement `WebExtractProvider`. -Extract-only providers don't need to implement `WebSearchProvider`. - -## hermes tools UX - -The provider picker uses **progressive disclosure**: -- **Default path** (90% of users): Pick one provider → sets `web.backend` for both. One selection, done. -- **Advanced path**: "Configure separately" option at bottom → two-step sub-picker for search + extract independently. - -See `.hermes/plans/2026-05-03-web-tools-provider-architecture.md` for the full UX flow diagram. diff --git a/tools/web_providers/__init__.py b/tools/web_providers/__init__.py deleted file mode 100644 index 15134175d21..00000000000 --- a/tools/web_providers/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Web capability providers — search, extract, crawl. - -Each capability has an ABC in ``base.py`` and vendor implementations in -sibling modules. Provider registries in ``web_tools.py`` map config names -to provider classes. -""" diff --git a/tools/web_providers/base.py b/tools/web_providers/base.py deleted file mode 100644 index 21772189191..00000000000 --- a/tools/web_providers/base.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Abstract base classes for web capability providers.""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any, Dict, List - - -class WebSearchProvider(ABC): - """Interface for web search backends (Firecrawl, Tavily, Exa, etc.). - - Implementations live in sibling modules. The user selects a provider - via ``hermes tools``; the choice is persisted as - ``config["web"]["search_backend"]`` (falling back to - ``config["web"]["backend"]``). - - Search providers return results in a normalized format:: - - { - "success": True, - "data": { - "web": [ - {"title": str, "url": str, "description": str, "position": int}, - ... - ] - } - } - - On failure:: - - {"success": False, "error": str} - """ - - @abstractmethod - def provider_name(self) -> str: - """Short, human-readable name shown in logs and diagnostics.""" - - @abstractmethod - def is_configured(self) -> bool: - """Return True when all required env vars / credentials are present. - - Called at tool-registration time to gate availability. - Must be cheap — no network calls. - """ - - @abstractmethod - def search(self, query: str, limit: int = 5) -> Dict[str, Any]: - """Execute a web search and return normalized results.""" - - -class WebExtractProvider(ABC): - """Interface for web content extraction backends. - - Implementations live in sibling modules. The user selects a provider - via ``hermes tools``; the choice is persisted as - ``config["web"]["extract_backend"]`` (falling back to - ``config["web"]["backend"]``). - - Extract providers return results in a normalized format:: - - { - "success": True, - "data": [ - {"url": str, "title": str, "content": str, - "raw_content": str, "metadata": dict}, - ... - ] - } - - On failure:: - - {"success": False, "error": str} - """ - - @abstractmethod - def provider_name(self) -> str: - """Short, human-readable name shown in logs and diagnostics.""" - - @abstractmethod - def is_configured(self) -> bool: - """Return True when all required env vars / credentials are present. - - Called at tool-registration time to gate availability. - Must be cheap — no network calls. - """ - - @abstractmethod - def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: - """Extract content from the given URLs and return normalized results."""