mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
refactor(web): per-capability backend selection for search/extract split
Introduce the foundation for independently selecting web search and extract backends — enabling future combinations like SearXNG for search + Firecrawl for extract. Architecture: - tools/web_providers/base.py: WebSearchProvider and WebExtractProvider ABCs with normalized result contracts (mirrors CloudBrowserProvider) - tools/web_tools.py: _get_search_backend() and _get_extract_backend() read per-capability config keys, fall through to shared web.backend - hermes_cli/config.py: web.search_backend and web.extract_backend in DEFAULT_CONFIG (empty = inherit from web.backend) Behavioral change: - web_search_tool() now dispatches via _get_search_backend() - web_extract_tool() now dispatches via _get_extract_backend() - When per-capability keys are empty (default), behavior is identical to before — _get_search_backend() falls through to _get_backend() This is purely structural — no new backends are added. SearXNG and other search-only/extract-only providers can now be added as simple drop-in modules in follow-up PRs. 12 new tests, 49 existing tests pass with zero regressions. Ref: #19198
This commit is contained in:
parent
6388aafbd6
commit
cd2cbc73b7
6 changed files with 411 additions and 5 deletions
|
|
@ -119,7 +119,7 @@ def _load_web_config() -> dict:
|
|||
return {}
|
||||
|
||||
def _get_backend() -> str:
|
||||
"""Determine which web backend to use.
|
||||
"""Determine which web backend to use (shared fallback).
|
||||
|
||||
Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
|
||||
Falls back to whichever API key is present for users who configured
|
||||
|
|
@ -145,6 +145,44 @@ def _get_backend() -> str:
|
|||
return "firecrawl" # default (backward compat)
|
||||
|
||||
|
||||
def _get_search_backend() -> str:
|
||||
"""Determine which backend to use for web_search specifically.
|
||||
|
||||
Selection priority:
|
||||
1. ``web.search_backend`` (per-capability override)
|
||||
2. ``web.backend`` (shared fallback — existing behavior)
|
||||
3. Auto-detect from env vars
|
||||
|
||||
This enables using different providers for search vs extract
|
||||
(e.g. SearXNG for search + Firecrawl for extract).
|
||||
"""
|
||||
return _get_capability_backend("search")
|
||||
|
||||
|
||||
def _get_extract_backend() -> str:
|
||||
"""Determine which backend to use for web_extract specifically.
|
||||
|
||||
Selection priority:
|
||||
1. ``web.extract_backend`` (per-capability override)
|
||||
2. ``web.backend`` (shared fallback — existing behavior)
|
||||
3. Auto-detect from env vars
|
||||
"""
|
||||
return _get_capability_backend("extract")
|
||||
|
||||
|
||||
def _get_capability_backend(capability: str) -> str:
|
||||
"""Shared helper for per-capability backend selection.
|
||||
|
||||
Reads ``web.{capability}_backend`` from config; if set and available,
|
||||
uses it. Otherwise falls through to the shared ``_get_backend()``.
|
||||
"""
|
||||
cfg = _load_web_config()
|
||||
specific = (cfg.get(f"{capability}_backend") or "").lower().strip()
|
||||
if specific and _is_backend_available(specific):
|
||||
return specific
|
||||
return _get_backend()
|
||||
|
||||
|
||||
def _is_backend_available(backend: str) -> bool:
|
||||
"""Return True when the selected backend is currently usable."""
|
||||
if backend == "exa":
|
||||
|
|
@ -1129,8 +1167,8 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
|||
if is_interrupted():
|
||||
return tool_error("Interrupted", success=False)
|
||||
|
||||
# Dispatch to the configured backend
|
||||
backend = _get_backend()
|
||||
# Dispatch to the configured search backend
|
||||
backend = _get_search_backend()
|
||||
if backend == "parallel":
|
||||
response_data = _parallel_search(query, limit)
|
||||
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
|
||||
|
|
@ -1286,7 +1324,7 @@ async def web_extract_tool(
|
|||
if not safe_urls:
|
||||
results = []
|
||||
else:
|
||||
backend = _get_backend()
|
||||
backend = _get_extract_backend()
|
||||
|
||||
if backend == "parallel":
|
||||
results = await _parallel_extract(safe_urls)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue