From ec8449e9c688b1e9cb8d47856e32f0a32a2d391b Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 14 May 2026 00:11:58 +0530 Subject: [PATCH] =?UTF-8?q?feat(web):=20exa=20plugin=20=E2=80=94=20first?= =?UTF-8?q?=20multi-capability=20migration=20(search=20+=20extract)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrates Exa from the inline `_exa_search()` / `_exa_extract()` helpers in tools/web_tools.py to a bundled plugin at plugins/web/exa/. This is the first plugin in this PR to advertise supports_extract=True, exercising the multi-capability ABC path that the initial three migrations (brave_free, ddgs, searxng — all search-only) did not cover. Both Exa methods are sync — the SDK is sync-only. The web_extract_tool dispatcher in tools/web_tools.py will continue to call them inline until Task "dispatch-extract-all" cuts it over to the registry. Behaviour preserved bit-for-bit aside from the ABC method-name change: - is_configured() -> is_available() - provider_name() -> name (property) - "exa" stays as the registered name - Module-level `_exa_client` cache + lazy `from exa_py import Exa` preserved at the new location. - Errors (ValueError for missing API key, ImportError for missing SDK, generic Exception) caught and surfaced as {"success": False, "error": ...} instead of raising. Adds "exa" to _WEB_PLUGIN_SKIPLIST in hermes_cli/tools_config.py so the hardcoded TOOL_CATEGORIES["web"] row and the plugin-injected row don't duplicate during the spike. The skip-list goes away in the cleanup phase along with the hardcoded row. The legacy inline `_exa_search` / `_exa_extract` / `_get_exa_client` / `_exa_client` in tools/web_tools.py are NOT deleted yet — the dispatcher still references them. They go away in the next dispatcher-cutover commit. E2E verified: - Plugin discovers + registers - .supports_search/.supports_extract/.supports_crawl = (True, True, False) - .get_setup_schema() returns the picker row shape - resolve(): explicit exa + EXA_API_KEY -> exa; without key -> exa (registered but unavailable, dispatcher surfaces "EXA_API_KEY not set" error) --- hermes_cli/tools_config.py | 2 +- plugins/web/exa/__init__.py | 15 +++ plugins/web/exa/plugin.yaml | 7 ++ plugins/web/exa/provider.py | 208 ++++++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 plugins/web/exa/__init__.py create mode 100644 plugins/web/exa/plugin.yaml create mode 100644 plugins/web/exa/provider.py diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index bb357e63d41..94c1b96a06a 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -1586,7 +1586,7 @@ def _plugin_video_gen_providers() -> list[dict]: # removed and this helper becomes the sole source of web-provider picker # rows (matching how Spotify / Google Meet are surfaced today purely from # their plugins). -_WEB_PLUGIN_SKIPLIST = frozenset({"brave-free", "ddgs", "searxng"}) +_WEB_PLUGIN_SKIPLIST = frozenset({"brave-free", "ddgs", "searxng", "exa"}) def _plugin_web_search_providers() -> list[dict]: diff --git a/plugins/web/exa/__init__.py b/plugins/web/exa/__init__.py new file mode 100644 index 00000000000..d2ef3f16cf6 --- /dev/null +++ b/plugins/web/exa/__init__.py @@ -0,0 +1,15 @@ +"""Exa web search + extract plugin — bundled, auto-loaded. + +Backed by the official Exa SDK (``exa-py``). Both search and extract are +sync; the dispatcher in :mod:`tools.web_tools` handles the wrap when the +caller is async. +""" + +from __future__ import annotations + +from plugins.web.exa.provider import ExaWebSearchProvider + + +def register(ctx) -> None: + """Register the Exa provider with the plugin context.""" + ctx.register_web_search_provider(ExaWebSearchProvider()) diff --git a/plugins/web/exa/plugin.yaml b/plugins/web/exa/plugin.yaml new file mode 100644 index 00000000000..1eceefb6ac5 --- /dev/null +++ b/plugins/web/exa/plugin.yaml @@ -0,0 +1,7 @@ +name: web-exa +version: 1.0.0 +description: "Exa web search and content extraction. Requires EXA_API_KEY — sign up at https://exa.ai." +author: NousResearch +kind: backend +provides_web_providers: + - exa diff --git a/plugins/web/exa/provider.py b/plugins/web/exa/provider.py new file mode 100644 index 00000000000..4daaa5f13dd --- /dev/null +++ b/plugins/web/exa/provider.py @@ -0,0 +1,208 @@ +"""Exa web search + content extraction — plugin form. + +Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Uses the +official Exa SDK (``exa-py``) which is lazy-loaded via +:func:`tools.lazy_deps.ensure` so that cold-start CLI users don't pay the +SDK import cost when Exa isn't configured. + +Config keys this provider responds to:: + + web: + search_backend: "exa" # explicit per-capability + extract_backend: "exa" # explicit per-capability + backend: "exa" # shared fallback for both + +Env var:: + + EXA_API_KEY=... # https://exa.ai (paid tier; free trial available) + +The previous in-tree implementation lived at +``tools.web_tools._exa_search`` / ``_exa_extract``; this file is the +canonical replacement. Behavior is bit-for-bit identical aside from the +ABC method-name change. +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict, List + +from agent.web_search_provider import WebSearchProvider + +logger = logging.getLogger(__name__) + +# Module-level cache for the Exa client so we don't reconstruct it per +# call. Matches the legacy `_exa_client` pattern in tools/web_tools.py. +_exa_client: Any = None + + +def _get_exa_client() -> Any: + """Lazy-import and cache an Exa SDK client. + + Mirrors :func:`tools.web_tools._get_exa_client`. Raises ``ValueError`` + when ``EXA_API_KEY`` is unset — the dispatcher catches that and + surfaces a typed error response. + """ + global _exa_client + + if _exa_client is not None: + return _exa_client + + try: + from tools.lazy_deps import ensure as _lazy_ensure + + _lazy_ensure("search.exa", prompt=False) + except ImportError: + pass + except Exception as exc: # noqa: BLE001 — lazy_deps surfaces install hints + raise ImportError(str(exc)) + + from exa_py import Exa # noqa: WPS433 — deliberately lazy + + api_key = os.getenv("EXA_API_KEY") + if not api_key: + raise ValueError( + "EXA_API_KEY environment variable not set. " + "Get your API key at https://exa.ai" + ) + + _exa_client = Exa(api_key=api_key) + _exa_client.headers["x-exa-integration"] = "hermes-agent" + return _exa_client + + +def _reset_client_for_tests() -> None: + """Drop the cached Exa client so tests can re-instantiate cleanly.""" + global _exa_client + _exa_client = None + + +class ExaWebSearchProvider(WebSearchProvider): + """Exa search + extract provider. + + Both methods are sync — Exa's SDK is sync-only. The web_extract_tool + dispatcher wraps sync extracts via ``asyncio.to_thread`` when it + needs to keep the event loop responsive. + """ + + @property + def name(self) -> str: + return "exa" + + @property + def display_name(self) -> str: + return "Exa" + + def is_available(self) -> bool: + """Return True when ``EXA_API_KEY`` is set to a non-empty value.""" + return bool(os.getenv("EXA_API_KEY", "").strip()) + + def supports_search(self) -> bool: + return True + + def supports_extract(self) -> bool: + return True + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + """Execute an Exa search. + + Returns ``{"success": True, "data": {"web": [{...}, ...]}}`` on + success, ``{"success": False, "error": str}`` on failure (incl. + missing API key and SDK install errors). + """ + try: + from tools.interrupt import is_interrupted + + if is_interrupted(): + return {"success": False, "error": "Interrupted"} + + logger.info("Exa search: '%s' (limit=%d)", query, limit) + response = _get_exa_client().search( + query, + num_results=limit, + contents={"highlights": True}, + ) + + web_results = [] + for i, result in enumerate(response.results or []): + highlights = result.highlights or [] + web_results.append( + { + "url": result.url or "", + "title": result.title or "", + "description": " ".join(highlights) if highlights else "", + "position": i + 1, + } + ) + + return {"success": True, "data": {"web": web_results}} + except ValueError as exc: + # Raised by _get_exa_client when EXA_API_KEY missing + return {"success": False, "error": str(exc)} + except ImportError as exc: + return {"success": False, "error": f"Exa SDK not installed: {exc}"} + except Exception as exc: # noqa: BLE001 — surface as failure + logger.warning("Exa search error: %s", exc) + return {"success": False, "error": f"Exa search failed: {exc}"} + + def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]: + """Extract content from one or more URLs via Exa. + + Returns a list of result dicts shaped for the legacy LLM + post-processing pipeline. On per-URL or whole-batch failure, + results carry an ``error`` field rather than raising. + """ + try: + from tools.interrupt import is_interrupted + + if is_interrupted(): + return [ + {"url": u, "error": "Interrupted", "title": ""} for u in urls + ] + + logger.info("Exa extract: %d URL(s)", len(urls)) + response = _get_exa_client().get_contents(urls, text=True) + + results: List[Dict[str, Any]] = [] + for result in response.results or []: + content = result.text or "" + url = result.url or "" + title = result.title or "" + results.append( + { + "url": url, + "title": title, + "content": content, + "raw_content": content, + "metadata": {"sourceURL": url, "title": title}, + } + ) + return results + except ValueError as exc: + return [{"url": u, "title": "", "content": "", "error": str(exc)} for u in urls] + except ImportError as exc: + return [ + {"url": u, "title": "", "content": "", "error": f"Exa SDK not installed: {exc}"} + for u in urls + ] + except Exception as exc: # noqa: BLE001 + logger.warning("Exa extract error: %s", exc) + return [ + {"url": u, "title": "", "content": "", "error": f"Exa extract failed: {exc}"} + for u in urls + ] + + def get_setup_schema(self) -> Dict[str, Any]: + return { + "name": "Exa", + "badge": "paid", + "tag": "Semantic + neural web search with content extraction.", + "env_vars": [ + { + "key": "EXA_API_KEY", + "prompt": "Exa API key", + "url": "https://exa.ai", + }, + ], + }