mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
Introduce the foundation for independently selecting web search and extract backends — enabling future combinations like SearXNG for search + Firecrawl for extract. Architecture: - tools/web_providers/base.py: WebSearchProvider and WebExtractProvider ABCs with normalized result contracts (mirrors CloudBrowserProvider) - tools/web_tools.py: _get_search_backend() and _get_extract_backend() read per-capability config keys, fall through to shared web.backend - hermes_cli/config.py: web.search_backend and web.extract_backend in DEFAULT_CONFIG (empty = inherit from web.backend) Behavioral change: - web_search_tool() now dispatches via _get_search_backend() - web_extract_tool() now dispatches via _get_extract_backend() - When per-capability keys are empty (default), behavior is identical to before — _get_search_backend() falls through to _get_backend() This is purely structural — no new backends are added. SearXNG and other search-only/extract-only providers can now be added as simple drop-in modules in follow-up PRs. 12 new tests, 49 existing tests pass with zero regressions. Ref: #19198
89 lines
2.6 KiB
Python
89 lines
2.6 KiB
Python
"""Abstract base classes for web capability providers."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
class WebSearchProvider(ABC):
|
|
"""Interface for web search backends (Firecrawl, Tavily, Exa, etc.).
|
|
|
|
Implementations live in sibling modules. The user selects a provider
|
|
via ``hermes tools``; the choice is persisted as
|
|
``config["web"]["search_backend"]`` (falling back to
|
|
``config["web"]["backend"]``).
|
|
|
|
Search providers return results in a normalized format::
|
|
|
|
{
|
|
"success": True,
|
|
"data": {
|
|
"web": [
|
|
{"title": str, "url": str, "description": str, "position": int},
|
|
...
|
|
]
|
|
}
|
|
}
|
|
|
|
On failure::
|
|
|
|
{"success": False, "error": str}
|
|
"""
|
|
|
|
@abstractmethod
|
|
def provider_name(self) -> str:
|
|
"""Short, human-readable name shown in logs and diagnostics."""
|
|
|
|
@abstractmethod
|
|
def is_configured(self) -> bool:
|
|
"""Return True when all required env vars / credentials are present.
|
|
|
|
Called at tool-registration time to gate availability.
|
|
Must be cheap — no network calls.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
|
"""Execute a web search and return normalized results."""
|
|
|
|
|
|
class WebExtractProvider(ABC):
|
|
"""Interface for web content extraction backends.
|
|
|
|
Implementations live in sibling modules. The user selects a provider
|
|
via ``hermes tools``; the choice is persisted as
|
|
``config["web"]["extract_backend"]`` (falling back to
|
|
``config["web"]["backend"]``).
|
|
|
|
Extract providers return results in a normalized format::
|
|
|
|
{
|
|
"success": True,
|
|
"data": [
|
|
{"url": str, "title": str, "content": str,
|
|
"raw_content": str, "metadata": dict},
|
|
...
|
|
]
|
|
}
|
|
|
|
On failure::
|
|
|
|
{"success": False, "error": str}
|
|
"""
|
|
|
|
@abstractmethod
|
|
def provider_name(self) -> str:
|
|
"""Short, human-readable name shown in logs and diagnostics."""
|
|
|
|
@abstractmethod
|
|
def is_configured(self) -> bool:
|
|
"""Return True when all required env vars / credentials are present.
|
|
|
|
Called at tool-registration time to gate availability.
|
|
Must be cheap — no network calls.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]:
|
|
"""Extract content from the given URLs and return normalized results."""
|