feat(web): exa plugin — first multi-capability migration (search + extract)

Migrates Exa from the inline `_exa_search()` / `_exa_extract()` helpers in
tools/web_tools.py to a bundled plugin at plugins/web/exa/.

This is the first plugin in this PR to advertise supports_extract=True,
exercising the multi-capability ABC path that the initial three migrations
(brave_free, ddgs, searxng — all search-only) did not cover.

Both Exa methods are sync — the SDK is sync-only. The web_extract_tool
dispatcher in tools/web_tools.py will continue to call them inline until
Task "dispatch-extract-all" cuts it over to the registry.

Behaviour preserved bit-for-bit aside from the ABC method-name change:
  - is_configured()  -> is_available()
  - provider_name()  -> name (property)
  - "exa" stays as the registered name
  - Module-level `_exa_client` cache + lazy `from exa_py import Exa`
    preserved at the new location.
  - Errors (ValueError for missing API key, ImportError for missing SDK,
    generic Exception) caught and surfaced as {"success": False, "error": ...}
    instead of raising.

Adds "exa" to _WEB_PLUGIN_SKIPLIST in hermes_cli/tools_config.py so the
hardcoded TOOL_CATEGORIES["web"] row and the plugin-injected row don't
duplicate during the spike. The skip-list goes away in the cleanup phase
along with the hardcoded row.

The legacy inline `_exa_search` / `_exa_extract` / `_get_exa_client` /
`_exa_client` in tools/web_tools.py are NOT deleted yet — the dispatcher
still references them. They go away in the next dispatcher-cutover commit.

E2E verified:
  - Plugin discovers + registers
  - .supports_search/.supports_extract/.supports_crawl = (True, True, False)
  - .get_setup_schema() returns the picker row shape
  - resolve(): explicit exa + EXA_API_KEY -> exa; without key -> exa (registered
    but unavailable, dispatcher surfaces "EXA_API_KEY not set" error)
This commit is contained in:
kshitijk4poor 2026-05-14 00:11:58 +05:30 committed by Teknium
parent e3f0a88891
commit ec8449e9c6
4 changed files with 231 additions and 1 deletions

View file

@ -0,0 +1,15 @@
"""Exa web search + extract plugin — bundled, auto-loaded.
Backed by the official Exa SDK (``exa-py``). Both search and extract are
sync; the dispatcher in :mod:`tools.web_tools` handles the wrap when the
caller is async.
"""
from __future__ import annotations
from plugins.web.exa.provider import ExaWebSearchProvider
def register(ctx) -> None:
"""Register the Exa provider with the plugin context."""
ctx.register_web_search_provider(ExaWebSearchProvider())

View file

@ -0,0 +1,7 @@
name: web-exa
version: 1.0.0
description: "Exa web search and content extraction. Requires EXA_API_KEY — sign up at https://exa.ai."
author: NousResearch
kind: backend
provides_web_providers:
- exa

208
plugins/web/exa/provider.py Normal file
View file

@ -0,0 +1,208 @@
"""Exa web search + content extraction — plugin form.
Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Uses the
official Exa SDK (``exa-py``) which is lazy-loaded via
:func:`tools.lazy_deps.ensure` so that cold-start CLI users don't pay the
SDK import cost when Exa isn't configured.
Config keys this provider responds to::
web:
search_backend: "exa" # explicit per-capability
extract_backend: "exa" # explicit per-capability
backend: "exa" # shared fallback for both
Env var::
EXA_API_KEY=... # https://exa.ai (paid tier; free trial available)
The previous in-tree implementation lived at
``tools.web_tools._exa_search`` / ``_exa_extract``; this file is the
canonical replacement. Behavior is bit-for-bit identical aside from the
ABC method-name change.
"""
from __future__ import annotations
import logging
import os
from typing import Any, Dict, List
from agent.web_search_provider import WebSearchProvider
logger = logging.getLogger(__name__)
# Module-level cache for the Exa client so we don't reconstruct it per
# call. Matches the legacy `_exa_client` pattern in tools/web_tools.py.
_exa_client: Any = None
def _get_exa_client() -> Any:
"""Lazy-import and cache an Exa SDK client.
Mirrors :func:`tools.web_tools._get_exa_client`. Raises ``ValueError``
when ``EXA_API_KEY`` is unset the dispatcher catches that and
surfaces a typed error response.
"""
global _exa_client
if _exa_client is not None:
return _exa_client
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("search.exa", prompt=False)
except ImportError:
pass
except Exception as exc: # noqa: BLE001 — lazy_deps surfaces install hints
raise ImportError(str(exc))
from exa_py import Exa # noqa: WPS433 — deliberately lazy
api_key = os.getenv("EXA_API_KEY")
if not api_key:
raise ValueError(
"EXA_API_KEY environment variable not set. "
"Get your API key at https://exa.ai"
)
_exa_client = Exa(api_key=api_key)
_exa_client.headers["x-exa-integration"] = "hermes-agent"
return _exa_client
def _reset_client_for_tests() -> None:
"""Drop the cached Exa client so tests can re-instantiate cleanly."""
global _exa_client
_exa_client = None
class ExaWebSearchProvider(WebSearchProvider):
"""Exa search + extract provider.
Both methods are sync Exa's SDK is sync-only. The web_extract_tool
dispatcher wraps sync extracts via ``asyncio.to_thread`` when it
needs to keep the event loop responsive.
"""
@property
def name(self) -> str:
return "exa"
@property
def display_name(self) -> str:
return "Exa"
def is_available(self) -> bool:
"""Return True when ``EXA_API_KEY`` is set to a non-empty value."""
return bool(os.getenv("EXA_API_KEY", "").strip())
def supports_search(self) -> bool:
return True
def supports_extract(self) -> bool:
return True
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
"""Execute an Exa search.
Returns ``{"success": True, "data": {"web": [{...}, ...]}}`` on
success, ``{"success": False, "error": str}`` on failure (incl.
missing API key and SDK install errors).
"""
try:
from tools.interrupt import is_interrupted
if is_interrupted():
return {"success": False, "error": "Interrupted"}
logger.info("Exa search: '%s' (limit=%d)", query, limit)
response = _get_exa_client().search(
query,
num_results=limit,
contents={"highlights": True},
)
web_results = []
for i, result in enumerate(response.results or []):
highlights = result.highlights or []
web_results.append(
{
"url": result.url or "",
"title": result.title or "",
"description": " ".join(highlights) if highlights else "",
"position": i + 1,
}
)
return {"success": True, "data": {"web": web_results}}
except ValueError as exc:
# Raised by _get_exa_client when EXA_API_KEY missing
return {"success": False, "error": str(exc)}
except ImportError as exc:
return {"success": False, "error": f"Exa SDK not installed: {exc}"}
except Exception as exc: # noqa: BLE001 — surface as failure
logger.warning("Exa search error: %s", exc)
return {"success": False, "error": f"Exa search failed: {exc}"}
def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]:
"""Extract content from one or more URLs via Exa.
Returns a list of result dicts shaped for the legacy LLM
post-processing pipeline. On per-URL or whole-batch failure,
results carry an ``error`` field rather than raising.
"""
try:
from tools.interrupt import is_interrupted
if is_interrupted():
return [
{"url": u, "error": "Interrupted", "title": ""} for u in urls
]
logger.info("Exa extract: %d URL(s)", len(urls))
response = _get_exa_client().get_contents(urls, text=True)
results: List[Dict[str, Any]] = []
for result in response.results or []:
content = result.text or ""
url = result.url or ""
title = result.title or ""
results.append(
{
"url": url,
"title": title,
"content": content,
"raw_content": content,
"metadata": {"sourceURL": url, "title": title},
}
)
return results
except ValueError as exc:
return [{"url": u, "title": "", "content": "", "error": str(exc)} for u in urls]
except ImportError as exc:
return [
{"url": u, "title": "", "content": "", "error": f"Exa SDK not installed: {exc}"}
for u in urls
]
except Exception as exc: # noqa: BLE001
logger.warning("Exa extract error: %s", exc)
return [
{"url": u, "title": "", "content": "", "error": f"Exa extract failed: {exc}"}
for u in urls
]
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Exa",
"badge": "paid",
"tag": "Semantic + neural web search with content extraction.",
"env_vars": [
{
"key": "EXA_API_KEY",
"prompt": "Exa API key",
"url": "https://exa.ai",
},
],
}