mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
Removes the legacy in-tree provider scaffolding that PR #25182 fully replaced with the plugin architecture: tools/web_providers/__init__.py (6 lines) tools/web_providers/base.py (89 lines — old ABCs) tools/web_providers/ARCHITECTURE.md (73 lines — old design doc) These were the staging-ground ABCs and provider modules that the plugin migration absorbed. All seven web providers now implement the single :class:`agent.web_search_provider.WebSearchProvider` ABC and live under ``plugins/web/<vendor>/``. Nothing else in the tree imports ``tools.web_providers`` — verified via grep before deletion. Test migration (tests/tools/test_web_providers.py) -------------------------------------------------- Rewrote ``TestWebProviderABCs`` to test the new unified ABC at :mod:`agent.web_search_provider`: - test_cannot_instantiate_abc_directly — abstract ``name`` + ``is_available`` - test_concrete_search_only_provider_works — exercise default ``supports_extract=False`` / ``supports_crawl=False`` flags - test_concrete_multi_capability_provider_works — exercise all three capabilities, async extract supported (declared sync here for simplicity; real plugins like parallel + firecrawl use async) - test_search_only_provider_skips_extract_and_crawl — verify ``supports_*()`` flags default to False so search-only providers don't have to implement extract() or crawl() The 9 other tests in the file (per-capability backend selection, DEFAULT_CONFIG merge, dispatcher routing) test public helpers in ``tools.web_tools`` that still exist and pass unchanged. agent/web_search_provider.py docstring updated to reflect that the legacy ABCs no longer exist; the response-shape contract is preserved bit-for-bit so external consumers see no behavioral change. Net diff -------- - tools/web_providers/ removed (-168 lines) - tests/tools/test_web_providers.py rewritten ABC section (+78/-30 net, same coverage, new API) - agent/web_search_provider.py docstring (-3/+5 lines) Verified -------- - 173/173 targeted web tests pass - 12/12 ABC contract tests pass with the new interface - No remaining grep hits for ``tools.web_providers`` outside of intentional historical references in plugin docstrings.
220 lines
7.8 KiB
Python
220 lines
7.8 KiB
Python
"""
|
|
Web Search Provider ABC
|
|
=======================
|
|
|
|
Defines the pluggable-backend interface for web search and content extraction.
|
|
Providers register instances via ``PluginContext.register_web_search_provider()``;
|
|
the active one (selected via ``web.search_backend`` / ``web.extract_backend`` /
|
|
``web.backend`` in ``config.yaml``) services every ``web_search`` /
|
|
``web_extract`` tool call.
|
|
|
|
Providers live in ``<repo>/plugins/web/<name>/`` (built-in, auto-loaded as
|
|
``kind: backend``) or ``~/.hermes/plugins/web/<name>/`` (user, opt-in via
|
|
``plugins.enabled``).
|
|
|
|
This ABC is the SINGLE plugin-facing surface for web providers — every
|
|
provider in the tree (brave-free, ddgs, searxng, exa, parallel, tavily,
|
|
firecrawl) implements it. The legacy in-tree ``tools.web_providers.base``
|
|
ABCs were deleted in PR #25182 along with the per-vendor inline helpers
|
|
in ``tools/web_tools.py``; the response-shape contract documented below
|
|
is preserved bit-for-bit so the tool wrapper does not have to translate.
|
|
|
|
Response shape (preserved from the legacy contract):
|
|
|
|
Search results::
|
|
|
|
{
|
|
"success": True,
|
|
"data": {
|
|
"web": [
|
|
{"title": str, "url": str, "description": str, "position": int},
|
|
...
|
|
]
|
|
}
|
|
}
|
|
|
|
Extract results::
|
|
|
|
{
|
|
"success": True,
|
|
"data": [
|
|
{"url": str, "title": str, "content": str,
|
|
"raw_content": str, "metadata": dict},
|
|
...
|
|
]
|
|
}
|
|
|
|
On failure (either capability)::
|
|
|
|
{"success": False, "error": str}
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import abc
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ABC
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class WebSearchProvider(abc.ABC):
|
|
"""Abstract base class for a web search/extract backend.
|
|
|
|
Subclasses must implement :meth:`is_available` and at least one of
|
|
:meth:`search` / :meth:`extract`. The :meth:`supports_search` and
|
|
:meth:`supports_extract` capability flags let the registry route each
|
|
tool call to the right provider, and let multi-capability providers
|
|
(SearXNG, Firecrawl, Tavily, …) advertise both.
|
|
"""
|
|
|
|
@property
|
|
@abc.abstractmethod
|
|
def name(self) -> str:
|
|
"""Stable short identifier used in ``web.search_backend`` /
|
|
``web.extract_backend`` / ``web.backend`` config keys.
|
|
|
|
Lowercase, no spaces; hyphens permitted to preserve existing
|
|
user-visible names. Examples: ``brave-free``, ``ddgs``,
|
|
``searxng``, ``firecrawl``.
|
|
"""
|
|
|
|
@property
|
|
def display_name(self) -> str:
|
|
"""Human-readable label shown in ``hermes tools``. Defaults to ``name``."""
|
|
return self.name
|
|
|
|
@abc.abstractmethod
|
|
def is_available(self) -> bool:
|
|
"""Return True when this provider can service calls.
|
|
|
|
Typically a cheap check (env var present, optional Python dep
|
|
importable, instance URL set). Must NOT make network calls — this
|
|
runs at tool-registration time and on every ``hermes tools`` paint.
|
|
"""
|
|
|
|
def supports_search(self) -> bool:
|
|
"""Return True if this provider implements :meth:`search`."""
|
|
return True
|
|
|
|
def supports_extract(self) -> bool:
|
|
"""Return True if this provider implements :meth:`extract`.
|
|
|
|
Both sync and async :meth:`extract` implementations are valid — the
|
|
dispatcher detects coroutine functions via
|
|
:func:`inspect.iscoroutinefunction` and awaits as needed. Sync
|
|
implementations that perform blocking I/O (HTTP, SDK calls) should
|
|
ideally wrap in :func:`asyncio.to_thread` at the call site; small
|
|
providers can keep their sync shape and let the dispatcher handle
|
|
threading.
|
|
"""
|
|
return False
|
|
|
|
def supports_crawl(self) -> bool:
|
|
"""Return True if this provider implements :meth:`crawl`.
|
|
|
|
Crawl differs from extract in that the agent provides a *seed URL*
|
|
and the provider walks linked pages on its own — useful for
|
|
documentation sites where the agent doesn't know all relevant
|
|
URLs upfront. Tavily is the only built-in backend that natively
|
|
crawls today; Firecrawl provides a similar capability that we
|
|
don't currently surface as a tool.
|
|
|
|
Providers that don't crawl should leave this as False; the
|
|
dispatcher in :func:`tools.web_tools.web_crawl_tool` will fall
|
|
back to its auxiliary-model summarization path.
|
|
"""
|
|
return False
|
|
|
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
|
"""Execute a web search.
|
|
|
|
Override when :meth:`supports_search` returns True. The default
|
|
raises NotImplementedError; callers should gate on
|
|
:meth:`supports_search` before calling.
|
|
"""
|
|
raise NotImplementedError(
|
|
f"{self.name} does not support search (override supports_search)"
|
|
)
|
|
|
|
def extract(self, urls: List[str], **kwargs: Any) -> Any:
|
|
"""Extract content from one or more URLs.
|
|
|
|
Override when :meth:`supports_extract` returns True. The default
|
|
raises NotImplementedError; callers should gate on
|
|
:meth:`supports_extract` before calling.
|
|
|
|
Return shape: a list of result dicts matching what the legacy
|
|
:func:`tools.web_tools.web_extract_tool` post-processing pipeline
|
|
expects::
|
|
|
|
[
|
|
{
|
|
"url": str,
|
|
"title": str,
|
|
"content": str,
|
|
"raw_content": str,
|
|
"metadata": dict, # optional
|
|
"error": str, # optional, only on per-URL failure
|
|
},
|
|
...
|
|
]
|
|
|
|
Implementations MAY be ``async def`` — the dispatcher detects
|
|
coroutines via :func:`inspect.iscoroutinefunction` and awaits.
|
|
|
|
``kwargs`` may carry forward-compat fields (``format``, ``include_raw``,
|
|
``max_chars``) — implementations should ignore unknown keys.
|
|
"""
|
|
raise NotImplementedError(
|
|
f"{self.name} does not support extract (override supports_extract)"
|
|
)
|
|
|
|
def crawl(self, url: str, **kwargs: Any) -> Any:
|
|
"""Crawl a seed URL and return results.
|
|
|
|
Override when :meth:`supports_crawl` returns True. The default
|
|
raises NotImplementedError; callers should gate on
|
|
:meth:`supports_crawl` before calling.
|
|
|
|
Return shape: ``{"results": [{"url": str, "title": str,
|
|
"content": str, ...}, ...]}`` matching what
|
|
:func:`tools.web_tools.web_crawl_tool` post-processing expects.
|
|
|
|
Implementations MAY be ``async def``.
|
|
|
|
``kwargs`` may carry forward-compat fields (e.g. ``max_depth``,
|
|
``include_domains``) — implementations should ignore unknown keys.
|
|
"""
|
|
raise NotImplementedError(
|
|
f"{self.name} does not support crawl (override supports_crawl)"
|
|
)
|
|
|
|
def get_setup_schema(self) -> Dict[str, Any]:
|
|
"""Return provider metadata for the ``hermes tools`` picker.
|
|
|
|
Used by ``hermes_cli/tools_config.py`` to inject this provider as a
|
|
row in the Web Search / Web Extract picker. Shape::
|
|
|
|
{
|
|
"name": "Brave Search (Free)",
|
|
"badge": "free",
|
|
"tag": "No paid tier needed — uses Brave's free API.",
|
|
"env_vars": [
|
|
{"key": "BRAVE_SEARCH_API_KEY",
|
|
"prompt": "Brave Search API key",
|
|
"url": "https://brave.com/search/api/"},
|
|
],
|
|
}
|
|
|
|
Default: minimal entry derived from ``display_name``. Override to
|
|
expose API key prompts, badges, and instance URL fields.
|
|
"""
|
|
return {
|
|
"name": self.display_name,
|
|
"badge": "",
|
|
"tag": "",
|
|
"env_vars": [],
|
|
}
|