mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
refactor(web): delete legacy tools/web_providers/ directory + migrate ABC tests
Removes the legacy in-tree provider scaffolding that PR #25182 fully replaced with the plugin architecture: tools/web_providers/__init__.py (6 lines) tools/web_providers/base.py (89 lines — old ABCs) tools/web_providers/ARCHITECTURE.md (73 lines — old design doc) These were the staging-ground ABCs and provider modules that the plugin migration absorbed. All seven web providers now implement the single :class:`agent.web_search_provider.WebSearchProvider` ABC and live under ``plugins/web/<vendor>/``. Nothing else in the tree imports ``tools.web_providers`` — verified via grep before deletion. Test migration (tests/tools/test_web_providers.py) -------------------------------------------------- Rewrote ``TestWebProviderABCs`` to test the new unified ABC at :mod:`agent.web_search_provider`: - test_cannot_instantiate_abc_directly — abstract ``name`` + ``is_available`` - test_concrete_search_only_provider_works — exercise default ``supports_extract=False`` / ``supports_crawl=False`` flags - test_concrete_multi_capability_provider_works — exercise all three capabilities, async extract supported (declared sync here for simplicity; real plugins like parallel + firecrawl use async) - test_search_only_provider_skips_extract_and_crawl — verify ``supports_*()`` flags default to False so search-only providers don't have to implement extract() or crawl() The 9 other tests in the file (per-capability backend selection, DEFAULT_CONFIG merge, dispatcher routing) test public helpers in ``tools.web_tools`` that still exist and pass unchanged. agent/web_search_provider.py docstring updated to reflect that the legacy ABCs no longer exist; the response-shape contract is preserved bit-for-bit so external consumers see no behavioral change. Net diff -------- - tools/web_providers/ removed (-168 lines) - tests/tools/test_web_providers.py rewritten ABC section (+78/-30 net, same coverage, new API) - agent/web_search_provider.py docstring (-3/+5 lines) Verified -------- - 173/173 targeted web tests pass - 12/12 ABC contract tests pass with the new interface - No remaining grep hits for ``tools.web_providers`` outside of intentional historical references in plugin docstrings.
This commit is contained in:
parent
24fe60faa2
commit
39b4ebfcea
5 changed files with 102 additions and 198 deletions
|
|
@ -12,13 +12,14 @@ Providers live in ``<repo>/plugins/web/<name>/`` (built-in, auto-loaded as
|
||||||
``kind: backend``) or ``~/.hermes/plugins/web/<name>/`` (user, opt-in via
|
``kind: backend``) or ``~/.hermes/plugins/web/<name>/`` (user, opt-in via
|
||||||
``plugins.enabled``).
|
``plugins.enabled``).
|
||||||
|
|
||||||
This ABC is the plugin-facing surface. The legacy
|
This ABC is the SINGLE plugin-facing surface for web providers — every
|
||||||
:mod:`tools.web_providers.base` module retains its own ABCs for in-tree
|
provider in the tree (brave-free, ddgs, searxng, exa, parallel, tavily,
|
||||||
consumers that haven't migrated yet; over time those will all flow through
|
firecrawl) implements it. The legacy in-tree ``tools.web_providers.base``
|
||||||
this provider.
|
ABCs were deleted in PR #25182 along with the per-vendor inline helpers
|
||||||
|
in ``tools/web_tools.py``; the response-shape contract documented below
|
||||||
|
is preserved bit-for-bit so the tool wrapper does not have to translate.
|
||||||
|
|
||||||
Response shape (mirrors the legacy contract in ``tools/web_providers/base.py``
|
Response shape (preserved from the legacy contract):
|
||||||
so the tool wrapper does not have to translate):
|
|
||||||
|
|
||||||
Search results::
|
Search results::
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,50 +20,121 @@ import pytest
|
||||||
|
|
||||||
|
|
||||||
class TestWebProviderABCs:
|
class TestWebProviderABCs:
|
||||||
"""The ABCs enforce the interface contract."""
|
"""The unified WebSearchProvider ABC enforces the interface contract.
|
||||||
|
|
||||||
def test_cannot_instantiate_search_provider(self):
|
After PR #25182, all seven providers are subclasses of
|
||||||
from tools.web_providers.base import WebSearchProvider
|
:class:`agent.web_search_provider.WebSearchProvider`. The legacy
|
||||||
|
in-tree ABCs at ``tools.web_providers.base`` (separate
|
||||||
|
``WebSearchProvider`` + ``WebExtractProvider``) were deleted in the
|
||||||
|
same PR — providers now advertise capabilities via
|
||||||
|
``supports_search() / supports_extract() / supports_crawl()`` flags.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_cannot_instantiate_abc_directly(self):
|
||||||
|
from agent.web_search_provider import WebSearchProvider
|
||||||
|
|
||||||
with pytest.raises(TypeError):
|
with pytest.raises(TypeError):
|
||||||
WebSearchProvider() # type: ignore[abstract]
|
WebSearchProvider() # type: ignore[abstract]
|
||||||
|
|
||||||
def test_cannot_instantiate_extract_provider(self):
|
def test_concrete_search_only_provider_works(self):
|
||||||
from tools.web_providers.base import WebExtractProvider
|
from agent.web_search_provider import WebSearchProvider
|
||||||
|
|
||||||
with pytest.raises(TypeError):
|
|
||||||
WebExtractProvider() # type: ignore[abstract]
|
|
||||||
|
|
||||||
def test_concrete_search_provider_works(self):
|
|
||||||
from tools.web_providers.base import WebSearchProvider
|
|
||||||
|
|
||||||
class Dummy(WebSearchProvider):
|
class Dummy(WebSearchProvider):
|
||||||
def provider_name(self) -> str:
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
return "dummy"
|
return "dummy"
|
||||||
def is_configured(self) -> bool:
|
|
||||||
|
@property
|
||||||
|
def display_name(self) -> str:
|
||||||
|
return "Dummy Search"
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def supports_search(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||||
return {"success": True, "data": {"web": []}}
|
return {"success": True, "data": {"web": []}}
|
||||||
|
|
||||||
d = Dummy()
|
d = Dummy()
|
||||||
assert d.provider_name() == "dummy"
|
assert d.name == "dummy"
|
||||||
assert d.is_configured() is True
|
assert d.display_name == "Dummy Search"
|
||||||
|
assert d.is_available() is True
|
||||||
|
assert d.supports_search() is True
|
||||||
|
assert d.supports_extract() is False # default
|
||||||
|
assert d.supports_crawl() is False # default
|
||||||
assert d.search("test")["success"] is True
|
assert d.search("test")["success"] is True
|
||||||
|
|
||||||
def test_concrete_extract_provider_works(self):
|
def test_concrete_multi_capability_provider_works(self):
|
||||||
from tools.web_providers.base import WebExtractProvider
|
from agent.web_search_provider import WebSearchProvider
|
||||||
|
|
||||||
class Dummy(WebExtractProvider):
|
class Dummy(WebSearchProvider):
|
||||||
def provider_name(self) -> str:
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
return "dummy"
|
return "dummy"
|
||||||
def is_configured(self) -> bool:
|
|
||||||
|
@property
|
||||||
|
def display_name(self) -> str:
|
||||||
|
return "Dummy Multi"
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
return True
|
return True
|
||||||
def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]:
|
|
||||||
return {"success": True, "data": [{"url": urls[0], "content": "x"}]}
|
def supports_search(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def supports_extract(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def supports_crawl(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||||
|
return {"success": True, "data": {"web": []}}
|
||||||
|
|
||||||
|
def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]:
|
||||||
|
return [{"url": urls[0], "content": "x"}]
|
||||||
|
|
||||||
|
def crawl(self, url: str, **kwargs: Any) -> Dict[str, Any]:
|
||||||
|
return {"results": [{"url": url, "content": "x"}]}
|
||||||
|
|
||||||
d = Dummy()
|
d = Dummy()
|
||||||
assert d.provider_name() == "dummy"
|
assert d.supports_search() is True
|
||||||
assert d.extract(["https://example.com"])["success"] is True
|
assert d.supports_extract() is True
|
||||||
|
assert d.supports_crawl() is True
|
||||||
|
assert d.extract(["https://example.com"])[0]["url"] == "https://example.com"
|
||||||
|
assert d.crawl("https://example.com")["results"][0]["url"] == "https://example.com"
|
||||||
|
|
||||||
|
def test_search_only_provider_skips_extract_and_crawl(self):
|
||||||
|
"""Search-only providers don't have to implement extract() / crawl()."""
|
||||||
|
from agent.web_search_provider import WebSearchProvider
|
||||||
|
|
||||||
|
class SearchOnly(WebSearchProvider):
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
return "search-only"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def display_name(self) -> str:
|
||||||
|
return "Search Only"
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def supports_search(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||||
|
return {"success": True, "data": {"web": []}}
|
||||||
|
|
||||||
|
# Should instantiate fine — extract/crawl have default
|
||||||
|
# supports_*() returning False and aren't required to be
|
||||||
|
# overridden when not advertised.
|
||||||
|
s = SearchOnly()
|
||||||
|
assert s.supports_search() is True
|
||||||
|
assert s.supports_extract() is False
|
||||||
|
assert s.supports_crawl() is False
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -1,73 +0,0 @@
|
||||||
# Web Tools Provider Architecture
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
Web tools (`web_search`, `web_extract`) use a **per-capability backend selection** system that allows different providers for search and extract independently.
|
|
||||||
|
|
||||||
## Config Keys
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
web:
|
|
||||||
backend: "firecrawl" # Shared fallback — applies to both if specific keys not set
|
|
||||||
search_backend: "" # Per-capability override for web_search
|
|
||||||
extract_backend: "" # Per-capability override for web_extract
|
|
||||||
```
|
|
||||||
|
|
||||||
**Selection priority (per capability):**
|
|
||||||
1. `web.search_backend` / `web.extract_backend` (explicit per-capability)
|
|
||||||
2. `web.backend` (shared fallback)
|
|
||||||
3. Auto-detect from environment variables
|
|
||||||
|
|
||||||
When per-capability keys are empty (default), behavior is identical to the legacy single-backend selection.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
web_search_tool()
|
|
||||||
└─ _get_search_backend()
|
|
||||||
├─ web.search_backend (if set + available)
|
|
||||||
└─ _get_backend() fallback
|
|
||||||
|
|
||||||
web_extract_tool()
|
|
||||||
└─ _get_extract_backend()
|
|
||||||
├─ web.extract_backend (if set + available)
|
|
||||||
└─ _get_backend() fallback
|
|
||||||
```
|
|
||||||
|
|
||||||
## Provider ABCs
|
|
||||||
|
|
||||||
New providers implement these interfaces in `tools/web_providers/`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from tools.web_providers.base import WebSearchProvider, WebExtractProvider
|
|
||||||
|
|
||||||
class MySearchProvider(WebSearchProvider):
|
|
||||||
def provider_name(self) -> str: ...
|
|
||||||
def is_configured(self) -> bool: ...
|
|
||||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]: ...
|
|
||||||
|
|
||||||
class MyExtractProvider(WebExtractProvider):
|
|
||||||
def provider_name(self) -> str: ...
|
|
||||||
def is_configured(self) -> bool: ...
|
|
||||||
def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: ...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Adding a New Search Provider
|
|
||||||
|
|
||||||
1. Create `tools/web_providers/your_provider.py` implementing `WebSearchProvider`
|
|
||||||
2. Add availability check to `_is_backend_available()` in `web_tools.py`
|
|
||||||
3. Add dispatch branch in `web_search_tool()`
|
|
||||||
4. Add provider to `hermes tools` picker in `tools_config.py`
|
|
||||||
5. Add env var to `OPTIONAL_ENV_VARS` in `config.py` (if needed)
|
|
||||||
6. Write tests in `tests/tools/`
|
|
||||||
|
|
||||||
Search-only providers (like SearXNG) don't need to implement `WebExtractProvider`.
|
|
||||||
Extract-only providers don't need to implement `WebSearchProvider`.
|
|
||||||
|
|
||||||
## hermes tools UX
|
|
||||||
|
|
||||||
The provider picker uses **progressive disclosure**:
|
|
||||||
- **Default path** (90% of users): Pick one provider → sets `web.backend` for both. One selection, done.
|
|
||||||
- **Advanced path**: "Configure separately" option at bottom → two-step sub-picker for search + extract independently.
|
|
||||||
|
|
||||||
See `.hermes/plans/2026-05-03-web-tools-provider-architecture.md` for the full UX flow diagram.
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
"""Web capability providers — search, extract, crawl.
|
|
||||||
|
|
||||||
Each capability has an ABC in ``base.py`` and vendor implementations in
|
|
||||||
sibling modules. Provider registries in ``web_tools.py`` map config names
|
|
||||||
to provider classes.
|
|
||||||
"""
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
||||||
"""Abstract base classes for web capability providers."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from typing import Any, Dict, List
|
|
||||||
|
|
||||||
|
|
||||||
class WebSearchProvider(ABC):
|
|
||||||
"""Interface for web search backends (Firecrawl, Tavily, Exa, etc.).
|
|
||||||
|
|
||||||
Implementations live in sibling modules. The user selects a provider
|
|
||||||
via ``hermes tools``; the choice is persisted as
|
|
||||||
``config["web"]["search_backend"]`` (falling back to
|
|
||||||
``config["web"]["backend"]``).
|
|
||||||
|
|
||||||
Search providers return results in a normalized format::
|
|
||||||
|
|
||||||
{
|
|
||||||
"success": True,
|
|
||||||
"data": {
|
|
||||||
"web": [
|
|
||||||
{"title": str, "url": str, "description": str, "position": int},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
On failure::
|
|
||||||
|
|
||||||
{"success": False, "error": str}
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def provider_name(self) -> str:
|
|
||||||
"""Short, human-readable name shown in logs and diagnostics."""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def is_configured(self) -> bool:
|
|
||||||
"""Return True when all required env vars / credentials are present.
|
|
||||||
|
|
||||||
Called at tool-registration time to gate availability.
|
|
||||||
Must be cheap — no network calls.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
|
||||||
"""Execute a web search and return normalized results."""
|
|
||||||
|
|
||||||
|
|
||||||
class WebExtractProvider(ABC):
|
|
||||||
"""Interface for web content extraction backends.
|
|
||||||
|
|
||||||
Implementations live in sibling modules. The user selects a provider
|
|
||||||
via ``hermes tools``; the choice is persisted as
|
|
||||||
``config["web"]["extract_backend"]`` (falling back to
|
|
||||||
``config["web"]["backend"]``).
|
|
||||||
|
|
||||||
Extract providers return results in a normalized format::
|
|
||||||
|
|
||||||
{
|
|
||||||
"success": True,
|
|
||||||
"data": [
|
|
||||||
{"url": str, "title": str, "content": str,
|
|
||||||
"raw_content": str, "metadata": dict},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
On failure::
|
|
||||||
|
|
||||||
{"success": False, "error": str}
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def provider_name(self) -> str:
|
|
||||||
"""Short, human-readable name shown in logs and diagnostics."""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def is_configured(self) -> bool:
|
|
||||||
"""Return True when all required env vars / credentials are present.
|
|
||||||
|
|
||||||
Called at tool-registration time to gate availability.
|
|
||||||
Must be cheap — no network calls.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]:
|
|
||||||
"""Extract content from the given URLs and return normalized results."""
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue