mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(web): exa plugin — first multi-capability migration (search + extract)
Migrates Exa from the inline `_exa_search()` / `_exa_extract()` helpers in
tools/web_tools.py to a bundled plugin at plugins/web/exa/.
This is the first plugin in this PR to advertise supports_extract=True,
exercising the multi-capability ABC path that the initial three migrations
(brave_free, ddgs, searxng — all search-only) did not cover.
Both Exa methods are sync — the SDK is sync-only. The web_extract_tool
dispatcher in tools/web_tools.py will continue to call them inline until
Task "dispatch-extract-all" cuts it over to the registry.
Behaviour preserved bit-for-bit aside from the ABC method-name change:
- is_configured() -> is_available()
- provider_name() -> name (property)
- "exa" stays as the registered name
- Module-level `_exa_client` cache + lazy `from exa_py import Exa`
preserved at the new location.
- Errors (ValueError for missing API key, ImportError for missing SDK,
generic Exception) caught and surfaced as {"success": False, "error": ...}
instead of raising.
Adds "exa" to _WEB_PLUGIN_SKIPLIST in hermes_cli/tools_config.py so the
hardcoded TOOL_CATEGORIES["web"] row and the plugin-injected row don't
duplicate during the spike. The skip-list goes away in the cleanup phase
along with the hardcoded row.
The legacy inline `_exa_search` / `_exa_extract` / `_get_exa_client` /
`_exa_client` in tools/web_tools.py are NOT deleted yet — the dispatcher
still references them. They go away in the next dispatcher-cutover commit.
E2E verified:
- Plugin discovers + registers
- .supports_search/.supports_extract/.supports_crawl = (True, True, False)
- .get_setup_schema() returns the picker row shape
- resolve(): explicit exa + EXA_API_KEY -> exa; without key -> exa (registered
but unavailable, dispatcher surfaces "EXA_API_KEY not set" error)
This commit is contained in:
parent
e3f0a88891
commit
ec8449e9c6
4 changed files with 231 additions and 1 deletions
15
plugins/web/exa/__init__.py
Normal file
15
plugins/web/exa/__init__.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
"""Exa web search + extract plugin — bundled, auto-loaded.
|
||||
|
||||
Backed by the official Exa SDK (``exa-py``). Both search and extract are
|
||||
sync; the dispatcher in :mod:`tools.web_tools` handles the wrap when the
|
||||
caller is async.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from plugins.web.exa.provider import ExaWebSearchProvider
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Register the Exa provider with the plugin context."""
|
||||
ctx.register_web_search_provider(ExaWebSearchProvider())
|
||||
7
plugins/web/exa/plugin.yaml
Normal file
7
plugins/web/exa/plugin.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
name: web-exa
|
||||
version: 1.0.0
|
||||
description: "Exa web search and content extraction. Requires EXA_API_KEY — sign up at https://exa.ai."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
provides_web_providers:
|
||||
- exa
|
||||
208
plugins/web/exa/provider.py
Normal file
208
plugins/web/exa/provider.py
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
"""Exa web search + content extraction — plugin form.
|
||||
|
||||
Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Uses the
|
||||
official Exa SDK (``exa-py``) which is lazy-loaded via
|
||||
:func:`tools.lazy_deps.ensure` so that cold-start CLI users don't pay the
|
||||
SDK import cost when Exa isn't configured.
|
||||
|
||||
Config keys this provider responds to::
|
||||
|
||||
web:
|
||||
search_backend: "exa" # explicit per-capability
|
||||
extract_backend: "exa" # explicit per-capability
|
||||
backend: "exa" # shared fallback for both
|
||||
|
||||
Env var::
|
||||
|
||||
EXA_API_KEY=... # https://exa.ai (paid tier; free trial available)
|
||||
|
||||
The previous in-tree implementation lived at
|
||||
``tools.web_tools._exa_search`` / ``_exa_extract``; this file is the
|
||||
canonical replacement. Behavior is bit-for-bit identical aside from the
|
||||
ABC method-name change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from agent.web_search_provider import WebSearchProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Module-level cache for the Exa client so we don't reconstruct it per
|
||||
# call. Matches the legacy `_exa_client` pattern in tools/web_tools.py.
|
||||
_exa_client: Any = None
|
||||
|
||||
|
||||
def _get_exa_client() -> Any:
|
||||
"""Lazy-import and cache an Exa SDK client.
|
||||
|
||||
Mirrors :func:`tools.web_tools._get_exa_client`. Raises ``ValueError``
|
||||
when ``EXA_API_KEY`` is unset — the dispatcher catches that and
|
||||
surfaces a typed error response.
|
||||
"""
|
||||
global _exa_client
|
||||
|
||||
if _exa_client is not None:
|
||||
return _exa_client
|
||||
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
|
||||
_lazy_ensure("search.exa", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as exc: # noqa: BLE001 — lazy_deps surfaces install hints
|
||||
raise ImportError(str(exc))
|
||||
|
||||
from exa_py import Exa # noqa: WPS433 — deliberately lazy
|
||||
|
||||
api_key = os.getenv("EXA_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"EXA_API_KEY environment variable not set. "
|
||||
"Get your API key at https://exa.ai"
|
||||
)
|
||||
|
||||
_exa_client = Exa(api_key=api_key)
|
||||
_exa_client.headers["x-exa-integration"] = "hermes-agent"
|
||||
return _exa_client
|
||||
|
||||
|
||||
def _reset_client_for_tests() -> None:
|
||||
"""Drop the cached Exa client so tests can re-instantiate cleanly."""
|
||||
global _exa_client
|
||||
_exa_client = None
|
||||
|
||||
|
||||
class ExaWebSearchProvider(WebSearchProvider):
|
||||
"""Exa search + extract provider.
|
||||
|
||||
Both methods are sync — Exa's SDK is sync-only. The web_extract_tool
|
||||
dispatcher wraps sync extracts via ``asyncio.to_thread`` when it
|
||||
needs to keep the event loop responsive.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "exa"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "Exa"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when ``EXA_API_KEY`` is set to a non-empty value."""
|
||||
return bool(os.getenv("EXA_API_KEY", "").strip())
|
||||
|
||||
def supports_search(self) -> bool:
|
||||
return True
|
||||
|
||||
def supports_extract(self) -> bool:
|
||||
return True
|
||||
|
||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||
"""Execute an Exa search.
|
||||
|
||||
Returns ``{"success": True, "data": {"web": [{...}, ...]}}`` on
|
||||
success, ``{"success": False, "error": str}`` on failure (incl.
|
||||
missing API key and SDK install errors).
|
||||
"""
|
||||
try:
|
||||
from tools.interrupt import is_interrupted
|
||||
|
||||
if is_interrupted():
|
||||
return {"success": False, "error": "Interrupted"}
|
||||
|
||||
logger.info("Exa search: '%s' (limit=%d)", query, limit)
|
||||
response = _get_exa_client().search(
|
||||
query,
|
||||
num_results=limit,
|
||||
contents={"highlights": True},
|
||||
)
|
||||
|
||||
web_results = []
|
||||
for i, result in enumerate(response.results or []):
|
||||
highlights = result.highlights or []
|
||||
web_results.append(
|
||||
{
|
||||
"url": result.url or "",
|
||||
"title": result.title or "",
|
||||
"description": " ".join(highlights) if highlights else "",
|
||||
"position": i + 1,
|
||||
}
|
||||
)
|
||||
|
||||
return {"success": True, "data": {"web": web_results}}
|
||||
except ValueError as exc:
|
||||
# Raised by _get_exa_client when EXA_API_KEY missing
|
||||
return {"success": False, "error": str(exc)}
|
||||
except ImportError as exc:
|
||||
return {"success": False, "error": f"Exa SDK not installed: {exc}"}
|
||||
except Exception as exc: # noqa: BLE001 — surface as failure
|
||||
logger.warning("Exa search error: %s", exc)
|
||||
return {"success": False, "error": f"Exa search failed: {exc}"}
|
||||
|
||||
def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]:
|
||||
"""Extract content from one or more URLs via Exa.
|
||||
|
||||
Returns a list of result dicts shaped for the legacy LLM
|
||||
post-processing pipeline. On per-URL or whole-batch failure,
|
||||
results carry an ``error`` field rather than raising.
|
||||
"""
|
||||
try:
|
||||
from tools.interrupt import is_interrupted
|
||||
|
||||
if is_interrupted():
|
||||
return [
|
||||
{"url": u, "error": "Interrupted", "title": ""} for u in urls
|
||||
]
|
||||
|
||||
logger.info("Exa extract: %d URL(s)", len(urls))
|
||||
response = _get_exa_client().get_contents(urls, text=True)
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
for result in response.results or []:
|
||||
content = result.text or ""
|
||||
url = result.url or ""
|
||||
title = result.title or ""
|
||||
results.append(
|
||||
{
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"raw_content": content,
|
||||
"metadata": {"sourceURL": url, "title": title},
|
||||
}
|
||||
)
|
||||
return results
|
||||
except ValueError as exc:
|
||||
return [{"url": u, "title": "", "content": "", "error": str(exc)} for u in urls]
|
||||
except ImportError as exc:
|
||||
return [
|
||||
{"url": u, "title": "", "content": "", "error": f"Exa SDK not installed: {exc}"}
|
||||
for u in urls
|
||||
]
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("Exa extract error: %s", exc)
|
||||
return [
|
||||
{"url": u, "title": "", "content": "", "error": f"Exa extract failed: {exc}"}
|
||||
for u in urls
|
||||
]
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "Exa",
|
||||
"badge": "paid",
|
||||
"tag": "Semantic + neural web search with content extraction.",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "EXA_API_KEY",
|
||||
"prompt": "Exa API key",
|
||||
"url": "https://exa.ai",
|
||||
},
|
||||
],
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue