mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
revert(web): remove keyless Parallel search fallback (#46350)
Remove the free Parallel Search MCP path and restore the keyed Parallel backend behavior from before it was introduced. Also drops the keyless fallback registration/display labeling tests and returns the Parallel SDK pin to the prior version.
This commit is contained in:
parent
a829e04d62
commit
f3fe99863d
16 changed files with 98 additions and 1398 deletions
|
|
@ -858,20 +858,6 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
|
|||
return False, ""
|
||||
|
||||
|
||||
def _used_free_parallel(result: str | None) -> bool:
|
||||
"""True when a web result came from Parallel's free Search MCP.
|
||||
|
||||
Only the keyless Parallel path tags its result with ``provider="parallel"``;
|
||||
the paid REST path and every other provider omit it. Used to label the tool
|
||||
line "Parallel search" / "Parallel fetch" exactly when the free MCP served
|
||||
the call.
|
||||
"""
|
||||
if not isinstance(result, str) or '"provider"' not in result:
|
||||
return False
|
||||
data = safe_json_loads(result)
|
||||
return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
|
||||
|
||||
|
||||
def get_cute_tool_message(
|
||||
tool_name: str, args: dict, duration: float, result: str | None = None,
|
||||
) -> str:
|
||||
|
|
@ -909,17 +895,15 @@ def get_cute_tool_message(
|
|||
return f"{line}{failure_suffix}"
|
||||
|
||||
if tool_name == "web_search":
|
||||
verb = "Parallel search" if _used_free_parallel(result) else "search"
|
||||
return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)} {dur}")
|
||||
return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}")
|
||||
if tool_name == "web_extract":
|
||||
verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
|
||||
urls = args.get("urls", [])
|
||||
if urls:
|
||||
url = urls[0] if isinstance(urls, list) else str(urls)
|
||||
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
|
||||
extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
|
||||
return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra} {dur}")
|
||||
return _wrap(f"┊ 📄 {verb:<9} pages {dur}")
|
||||
return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
|
||||
return _wrap(f"┊ 📄 fetch pages {dur}")
|
||||
if tool_name == "terminal":
|
||||
return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
|
||||
if tool_name == "process":
|
||||
|
|
|
|||
|
|
@ -2182,13 +2182,8 @@ def _toolset_needs_configuration_prompt(
|
|||
tts_cfg = config.get("tts", {})
|
||||
return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg
|
||||
if ts_key == "web":
|
||||
# Web works out of the box via Parallel's free Search MCP (no key), so
|
||||
# don't force setup just because ``web.backend`` is unset — only prompt
|
||||
# when web isn't actually usable (e.g. an explicit backend configured
|
||||
# without its credentials). Lazy import: web_tools is heavy and most
|
||||
# tools_config callers don't need it.
|
||||
from tools.web_tools import check_web_api_key
|
||||
return not check_web_api_key()
|
||||
web_cfg = config.get("web", {})
|
||||
return not isinstance(web_cfg, dict) or "backend" not in web_cfg
|
||||
if ts_key == "browser":
|
||||
browser_cfg = config.get("browser", {})
|
||||
return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
|
||||
|
|
|
|||
|
|
@ -1,20 +1,14 @@
|
|||
"""Parallel.ai web search + content extraction — plugin form.
|
||||
|
||||
Subclasses :class:`agent.web_search_provider.WebSearchProvider`.
|
||||
Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Uses two
|
||||
distinct Parallel SDK clients:
|
||||
|
||||
Search runs on one of two transports, picked by credential:
|
||||
- ``Parallel`` (sync) — for :meth:`search`
|
||||
- ``AsyncParallel`` (async) — for :meth:`extract`
|
||||
|
||||
- **No key →** the free hosted Search MCP at ``https://search.parallel.ai/mcp``
|
||||
(anonymous Streamable-HTTP JSON-RPC). This makes ``web_search`` work out of
|
||||
the box with zero setup, which is why ``parallel`` is the keyless default
|
||||
backend in :func:`tools.web_tools._get_backend`.
|
||||
- **``PARALLEL_API_KEY`` →** the ``parallel`` SDK's v1 ``search`` / ``extract``
|
||||
REST endpoints (objective-tuned, mode-selectable, higher rate limits).
|
||||
|
||||
Extract mirrors search: keyed uses the async SDK (``AsyncParallel``) v1
|
||||
``extract``; keyless uses the free MCP's ``web_fetch``. :meth:`extract` is
|
||||
declared ``async def`` and the dispatcher in
|
||||
:func:`tools.web_tools.web_extract_tool` detects coroutines via
|
||||
This is the first plugin to exercise the **async-extract** code path in
|
||||
the ABC: :meth:`extract` is declared ``async def``, and the dispatcher
|
||||
in :func:`tools.web_tools.web_extract_tool` detects coroutines via
|
||||
:func:`inspect.iscoroutinefunction` and awaits.
|
||||
|
||||
Config keys this provider responds to::
|
||||
|
|
@ -23,66 +17,25 @@ Config keys this provider responds to::
|
|||
search_backend: "parallel" # explicit per-capability
|
||||
extract_backend: "parallel" # explicit per-capability
|
||||
backend: "parallel" # shared fallback
|
||||
# Optional: search mode (default "advanced"; also "basic")
|
||||
# via the PARALLEL_SEARCH_MODE env var. REST path only.
|
||||
# Optional: search mode (default "agentic"; also "fast" or "one-shot")
|
||||
# via the PARALLEL_SEARCH_MODE env var.
|
||||
|
||||
Env vars::
|
||||
|
||||
PARALLEL_API_KEY=... # https://parallel.ai (optional — unlocks
|
||||
# the v1 REST Search API; without it,
|
||||
# search and extract use the free MCP)
|
||||
PARALLEL_SEARCH_MODE=advanced # optional: basic|advanced (legacy
|
||||
# fast/one-shot map to basic, agentic to
|
||||
# advanced). REST path only.
|
||||
PARALLEL_API_KEY=... # https://parallel.ai (required)
|
||||
PARALLEL_SEARCH_MODE=agentic # optional: agentic|fast|one-shot
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.web_search_provider import WebSearchProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Free hosted Search MCP — anonymous-friendly, used when no PARALLEL_API_KEY is
|
||||
# configured. Docs: https://docs.parallel.ai/integrations/mcp/search-mcp
|
||||
_MCP_SEARCH_URL = "https://search.parallel.ai/mcp"
|
||||
_MCP_PROTOCOL_VERSION = "2025-06-18"
|
||||
# Deliberately generic client identity. Project policy (see the telemetry PR
|
||||
# policy in AGENTS.md) forbids third-party usage attribution without an
|
||||
# explicit user opt-in, so neither clientInfo nor the User-Agent names
|
||||
# hermes. MCP requires *a* clientInfo; a neutral one satisfies the spec
|
||||
# without attributing traffic.
|
||||
_MCP_CLIENT_NAME = "mcp-web-client"
|
||||
_MCP_CLIENT_VERSION = "1.0.0"
|
||||
_MCP_USER_AGENT = f"{_MCP_CLIENT_NAME}/{_MCP_CLIENT_VERSION}"
|
||||
_MCP_TIMEOUT_SECONDS = 30.0
|
||||
|
||||
# Free-tier attribution. The hosted Search MCP is free to use; surfacing this
|
||||
# on keyless results credits Parallel and matches the free-tier terms
|
||||
# (https://parallel.ai/customer-terms).
|
||||
_FREE_MCP_ATTRIBUTION = (
|
||||
"Search powered by the free Parallel Web Search MCP (https://parallel.ai)."
|
||||
)
|
||||
|
||||
|
||||
def _new_session_id() -> str:
|
||||
"""Mint a fresh Parallel ``session_id`` for a single tool call.
|
||||
|
||||
Per-call rather than process-global: one process serves many unrelated
|
||||
chats in the gateway/batch runners, and a shared id would pool their
|
||||
searches into one Parallel session. The prefix is deliberately generic
|
||||
(no hermes attribution — telemetry policy).
|
||||
"""
|
||||
return f"{_MCP_CLIENT_NAME}-{uuid.uuid4().hex}"
|
||||
|
||||
# Module-level note: the canonical cache slots ``_parallel_client`` and
|
||||
# ``_async_parallel_client`` live on :mod:`tools.web_tools` so tests that do
|
||||
# ``tools.web_tools._parallel_client = None`` between cases see fresh state.
|
||||
|
|
@ -180,319 +133,11 @@ _get_async_parallel_client = _get_async_client
|
|||
|
||||
|
||||
def _resolve_search_mode() -> str:
|
||||
"""Return the validated v1 search mode (default "advanced").
|
||||
|
||||
V1 collapses the three Beta modes into two. We accept the v1 values
|
||||
directly and map the legacy Beta values for back-compat with anyone who
|
||||
still sets ``PARALLEL_SEARCH_MODE=fast|one-shot|agentic``:
|
||||
|
||||
- ``fast`` / ``one-shot`` → ``basic`` (lower latency)
|
||||
- ``agentic`` → ``advanced`` (higher quality, the v1 default)
|
||||
"""
|
||||
mode = os.getenv("PARALLEL_SEARCH_MODE", "advanced").lower().strip()
|
||||
if mode == "basic" or mode in {"fast", "one-shot"}:
|
||||
return "basic"
|
||||
# advanced, legacy "agentic", and anything unrecognized → the v1 default.
|
||||
return "advanced"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Free Search MCP transport (keyless path)
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# A small hand-rolled Streamable-HTTP JSON-RPC client for the hosted Search
|
||||
# MCP, rather than the full MCP-client subsystem: we only call two tools
|
||||
# (``web_search`` / ``web_fetch``), so keeping it inline lets web_search and
|
||||
# web_extract stay ordinary tools with the MCP endpoint as just their wire
|
||||
# protocol.
|
||||
|
||||
|
||||
def _mcp_headers(
|
||||
session_id: str | None,
|
||||
api_key: str | None,
|
||||
protocol_version: str | None = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Headers for an MCP request.
|
||||
|
||||
A Bearer token is attached only when we actually hold a key — the free
|
||||
endpoint is anonymous, and sending an empty/garbage token would make it
|
||||
401 instead of serving the anonymous tier. After ``initialize`` the
|
||||
Streamable-HTTP spec expects the negotiated ``MCP-Protocol-Version`` on
|
||||
every follow-up request, so we echo it once known.
|
||||
"""
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json, text/event-stream",
|
||||
"User-Agent": _MCP_USER_AGENT,
|
||||
}
|
||||
if session_id:
|
||||
headers["Mcp-Session-Id"] = session_id
|
||||
if protocol_version:
|
||||
headers["MCP-Protocol-Version"] = protocol_version
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
return headers
|
||||
|
||||
|
||||
def _iter_mcp_messages(text: str):
|
||||
"""Yield JSON-RPC message dicts from a plain-JSON or SSE response body.
|
||||
|
||||
Handles ``application/json`` (a single object) and ``text/event-stream``
|
||||
(SSE: events separated by blank lines; an event's one-or-more ``data:``
|
||||
lines concatenate into a single JSON payload). Unparseable chunks and
|
||||
non-``data`` SSE fields (``event:``/``id:``/comments) are skipped.
|
||||
"""
|
||||
def _emit(payload):
|
||||
# Streamable HTTP allows batching responses/notifications into a JSON
|
||||
# array — flatten so callers always see individual message dicts.
|
||||
if isinstance(payload, list):
|
||||
yield from payload
|
||||
elif payload is not None:
|
||||
yield payload
|
||||
|
||||
body = (text or "").strip()
|
||||
if not body:
|
||||
return
|
||||
if body.startswith("{") or body.startswith("["):
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
return
|
||||
yield from _emit(parsed)
|
||||
return
|
||||
|
||||
data_lines: List[str] = []
|
||||
|
||||
def _flush():
|
||||
if not data_lines:
|
||||
return None
|
||||
try:
|
||||
return json.loads("\n".join(data_lines))
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
for raw in body.split("\n"):
|
||||
line = raw.rstrip("\r")
|
||||
if line.startswith("data:"):
|
||||
data_lines.append(line[len("data:"):].lstrip())
|
||||
elif line.strip() == "": # event boundary
|
||||
yield from _emit(_flush())
|
||||
data_lines = []
|
||||
yield from _emit(_flush())
|
||||
|
||||
|
||||
def _mcp_response_envelope(text: str, request_id: str) -> Dict[str, Any]:
|
||||
"""Select the JSON-RPC response for *request_id* from an MCP response body.
|
||||
|
||||
Streamable-HTTP servers may emit progress/log notifications before the
|
||||
final result, so we scan the whole stream and return the result/error
|
||||
message whose ``id`` matches our request. Falls back to the last
|
||||
result/error-bearing message if no id matches; ``{}`` if none is present.
|
||||
"""
|
||||
fallback: Dict[str, Any] = {}
|
||||
for msg in _iter_mcp_messages(text):
|
||||
if not isinstance(msg, dict) or not ("result" in msg or "error" in msg):
|
||||
continue
|
||||
if msg.get("id") == request_id:
|
||||
return msg
|
||||
fallback = msg
|
||||
return fallback
|
||||
|
||||
|
||||
def _mcp_payload(envelope: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract the tool result payload from a ``tools/call`` envelope.
|
||||
|
||||
Prefers ``structuredContent`` (authoritative machine-readable form);
|
||||
otherwise scans text blocks for the first JSON-parseable one. Raises on a
|
||||
JSON-RPC error or a tool-level ``isError``.
|
||||
"""
|
||||
if "error" in envelope:
|
||||
raise RuntimeError(f"Parallel MCP error: {str(envelope['error'])[:500]}")
|
||||
result = envelope.get("result") or {}
|
||||
if result.get("isError"):
|
||||
raise RuntimeError(f"Parallel MCP tool error: {str(result)[:500]}")
|
||||
|
||||
structured = result.get("structuredContent")
|
||||
if isinstance(structured, dict):
|
||||
return structured
|
||||
|
||||
for block in result.get("content", []) or []:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text = str(block.get("text") or "")
|
||||
if not text:
|
||||
continue
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
raise RuntimeError(
|
||||
f"Parallel MCP returned no parseable content: {str(result)[:500]}"
|
||||
)
|
||||
|
||||
|
||||
def _mcp_call(
|
||||
tool_name: str, arguments: Dict[str, Any], api_key: str | None
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the MCP handshake then a single ``tools/call`` and return its payload.
|
||||
|
||||
initialize → (capture ``Mcp-Session-Id``) → notifications/initialized →
|
||||
tools/call ``tool_name``. Returns the parsed tool payload dict (see
|
||||
:func:`_mcp_payload`). A Bearer token is attached only when *api_key* is set.
|
||||
"""
|
||||
with httpx.Client(timeout=_MCP_TIMEOUT_SECONDS) as client:
|
||||
# 1. initialize — capture the server-assigned MCP session id.
|
||||
init_id = str(uuid.uuid4())
|
||||
init = client.post(
|
||||
_MCP_SEARCH_URL,
|
||||
headers=_mcp_headers(None, api_key),
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": init_id,
|
||||
"method": "initialize",
|
||||
"params": {
|
||||
"protocolVersion": _MCP_PROTOCOL_VERSION,
|
||||
"capabilities": {},
|
||||
"clientInfo": {
|
||||
"name": _MCP_CLIENT_NAME,
|
||||
"version": _MCP_CLIENT_VERSION,
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
init.raise_for_status()
|
||||
# Only echo a session id the server actually issued. Stateless
|
||||
# Streamable-HTTP servers may omit it; inventing one and sending it on
|
||||
# follow-up requests can get those requests rejected (the server never
|
||||
# created that session). When absent, the Mcp-Session-Id header is simply
|
||||
# omitted (see _mcp_headers). This is separate from the tool-arg
|
||||
# ``session_id`` below, which is a client-minted rate-limit/grouping id.
|
||||
mcp_session_id = init.headers.get("mcp-session-id")
|
||||
init_env = _mcp_response_envelope(init.text, init_id)
|
||||
# Echo the negotiated protocol version on every post-init request, per
|
||||
# the Streamable-HTTP spec (servers may enforce it).
|
||||
negotiated_version = (
|
||||
(init_env.get("result") or {}).get("protocolVersion")
|
||||
or _MCP_PROTOCOL_VERSION
|
||||
)
|
||||
|
||||
# 2. notifications/initialized — required handshake ack.
|
||||
client.post(
|
||||
_MCP_SEARCH_URL,
|
||||
headers=_mcp_headers(mcp_session_id, api_key, negotiated_version),
|
||||
json={"jsonrpc": "2.0", "method": "notifications/initialized"},
|
||||
)
|
||||
|
||||
# 3. tools/call.
|
||||
call_id = str(uuid.uuid4())
|
||||
call = client.post(
|
||||
_MCP_SEARCH_URL,
|
||||
headers=_mcp_headers(mcp_session_id, api_key, negotiated_version),
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": call_id,
|
||||
"method": "tools/call",
|
||||
"params": {"name": tool_name, "arguments": arguments},
|
||||
},
|
||||
)
|
||||
call.raise_for_status()
|
||||
return _mcp_payload(_mcp_response_envelope(call.text, call_id))
|
||||
|
||||
|
||||
def _mcp_web_search(query: str, limit: int, api_key: str | None) -> Dict[str, Any]:
|
||||
"""Run a ``web_search`` tool call against the hosted Search MCP.
|
||||
|
||||
Returns the standard provider search shape
|
||||
(``{"success": True, "data": {"web": [...]}}``). The MCP serves a fixed
|
||||
result count, so ``limit`` is applied client-side. The MCP requires
|
||||
``objective`` (REST treats it as optional), so we mirror the query.
|
||||
"""
|
||||
payload = _mcp_call(
|
||||
"web_search",
|
||||
{
|
||||
"objective": query,
|
||||
"search_queries": [query],
|
||||
"session_id": _new_session_id(),
|
||||
},
|
||||
api_key,
|
||||
)
|
||||
|
||||
web_results: List[Dict[str, Any]] = []
|
||||
for i, result in enumerate((payload.get("results") or [])[: max(limit, 1)]):
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
excerpts = result.get("excerpts") or []
|
||||
web_results.append(
|
||||
{
|
||||
"url": result.get("url") or "",
|
||||
"title": result.get("title") or "",
|
||||
"description": " ".join(excerpts) if excerpts else "",
|
||||
"position": i + 1,
|
||||
}
|
||||
)
|
||||
|
||||
# Credit the free tier (anonymous path only — keyed search uses REST and
|
||||
# carries no attribution).
|
||||
return {
|
||||
"success": True,
|
||||
"data": {"web": web_results},
|
||||
"provider": "parallel",
|
||||
"attribution": _FREE_MCP_ATTRIBUTION,
|
||||
}
|
||||
|
||||
|
||||
def _mcp_web_fetch(urls: List[str], api_key: str | None) -> List[Dict[str, Any]]:
|
||||
"""Run a ``web_fetch`` tool call against the hosted Search MCP.
|
||||
|
||||
Returns the per-URL extract shape that
|
||||
:func:`tools.web_tools.web_extract_tool` expects — exactly one row per input
|
||||
URL, in request order (including duplicates). We pass ``full_content=True``
|
||||
so the page body comes back as markdown (matching the keyed SDK path and
|
||||
what extract callers/summarizers expect), falling back to excerpts only when
|
||||
full content is absent. Any input the MCP didn't return is emitted as a
|
||||
per-URL error row.
|
||||
"""
|
||||
payload = _mcp_call(
|
||||
"web_fetch",
|
||||
{"urls": list(urls), "full_content": True, "session_id": _new_session_id()},
|
||||
api_key,
|
||||
)
|
||||
|
||||
# Index the response by URL, then emit one row per *input* URL in order so
|
||||
# duplicates and positional alignment with the request list are preserved.
|
||||
by_url: Dict[str, Dict[str, Any]] = {}
|
||||
for item in payload.get("results") or []:
|
||||
if isinstance(item, dict) and item.get("url"):
|
||||
by_url.setdefault(item["url"], item)
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
for url in urls:
|
||||
item = by_url.get(url)
|
||||
if item is None:
|
||||
results.append(
|
||||
{
|
||||
"url": url,
|
||||
"title": "",
|
||||
"content": "",
|
||||
"error": "extraction failed (no content returned)",
|
||||
"metadata": {"sourceURL": url},
|
||||
}
|
||||
)
|
||||
continue
|
||||
title = item.get("title") or ""
|
||||
# Prefer the full page body; fall back to joined excerpts (mirrors the
|
||||
# keyed SDK extract path).
|
||||
content = item.get("full_content") or "\n\n".join(item.get("excerpts") or [])
|
||||
results.append(
|
||||
{
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"raw_content": content,
|
||||
"metadata": {"sourceURL": url, "title": title},
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
"""Return the validated PARALLEL_SEARCH_MODE value (default "agentic")."""
|
||||
mode = os.getenv("PARALLEL_SEARCH_MODE", "agentic").lower().strip()
|
||||
if mode not in {"fast", "one-shot", "agentic"}:
|
||||
mode = "agentic"
|
||||
return mode
|
||||
|
||||
|
||||
class ParallelWebSearchProvider(WebSearchProvider):
|
||||
|
|
@ -507,14 +152,7 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
return "Parallel"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when ``PARALLEL_API_KEY`` is set.
|
||||
|
||||
Deliberately key-based: this gates the registry's active-provider walk
|
||||
and the ``hermes tools`` picker (auto-selecting Parallel for a user who
|
||||
hasn't named it), so it must not claim availability on the keyless path.
|
||||
The keyless free-MCP path is reached independently via
|
||||
:func:`tools.web_tools._get_backend`'s ``parallel`` terminal default.
|
||||
"""
|
||||
"""Return True when ``PARALLEL_API_KEY`` is set to a non-empty value."""
|
||||
return bool(os.getenv("PARALLEL_API_KEY", "").strip())
|
||||
|
||||
def supports_search(self) -> bool:
|
||||
|
|
@ -526,11 +164,9 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||
"""Execute a Parallel search (sync).
|
||||
|
||||
With ``PARALLEL_API_KEY`` set, uses the v1 ``search`` REST endpoint with
|
||||
the configured mode (``PARALLEL_SEARCH_MODE`` env var, default
|
||||
"advanced"; limit requested via advanced_settings.max_results, capped at
|
||||
20). Without a key, falls back to the free hosted Search MCP so search
|
||||
still works with zero setup.
|
||||
Uses the ``beta.search`` endpoint with the configured mode
|
||||
(``PARALLEL_SEARCH_MODE`` env var, default "agentic"). Limit is
|
||||
capped at 20 server-side.
|
||||
"""
|
||||
try:
|
||||
from tools.interrupt import is_interrupted
|
||||
|
|
@ -538,31 +174,19 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
if is_interrupted():
|
||||
return {"success": False, "error": "Interrupted"}
|
||||
|
||||
api_key = os.getenv("PARALLEL_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
logger.info(
|
||||
"Parallel search (free MCP): '%s' (limit=%d)", query, limit
|
||||
)
|
||||
return _mcp_web_search(query, limit, api_key=None)
|
||||
|
||||
mode = _resolve_search_mode()
|
||||
logger.info(
|
||||
"Parallel search (v1 REST): '%s' (mode=%s, limit=%d)",
|
||||
query, mode, limit,
|
||||
"Parallel search: '%s' (mode=%s, limit=%d)", query, mode, limit
|
||||
)
|
||||
# v1 Search API. Request the caller's limit via max_results (capped
|
||||
# at 20) so we don't rely on the API default — the slice below can
|
||||
# only trim, not ask for more.
|
||||
response = _get_sync_client().search(
|
||||
response = _get_sync_client().beta.search(
|
||||
search_queries=[query],
|
||||
objective=query,
|
||||
mode=mode,
|
||||
session_id=_new_session_id(),
|
||||
advanced_settings={"max_results": min(max(limit, 1), 20)},
|
||||
max_results=min(limit, 20),
|
||||
)
|
||||
|
||||
web_results = []
|
||||
for i, result in enumerate((response.results or [])[: max(limit, 1)]):
|
||||
for i, result in enumerate(response.results or []):
|
||||
excerpts = result.excerpts or []
|
||||
web_results.append(
|
||||
{
|
||||
|
|
@ -573,8 +197,6 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
}
|
||||
)
|
||||
|
||||
# Paid/REST path: no attribution and no "[Parallel]" label — the
|
||||
# branding is specifically for the free Search MCP tier.
|
||||
return {"success": True, "data": {"web": web_results}}
|
||||
except ValueError as exc:
|
||||
return {"success": False, "error": str(exc)}
|
||||
|
|
@ -590,12 +212,7 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
async def extract(
|
||||
self, urls: List[str], **kwargs: Any
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Extract content from one or more URLs.
|
||||
|
||||
With ``PARALLEL_API_KEY`` set, uses the async SDK's v1 ``extract`` for
|
||||
full page content. Without a key, falls back to the free hosted Search
|
||||
MCP's ``web_fetch`` tool so extraction works with zero setup, mirroring
|
||||
the keyless search path.
|
||||
"""Extract content from one or more URLs via the async SDK.
|
||||
|
||||
Returns the legacy list-of-results shape that
|
||||
:func:`tools.web_tools.web_extract_tool` expects: one entry per
|
||||
|
|
@ -610,21 +227,10 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
{"url": u, "error": "Interrupted", "title": ""} for u in urls
|
||||
]
|
||||
|
||||
api_key = os.getenv("PARALLEL_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
logger.info(
|
||||
"Parallel extract (free MCP web_fetch): %d URL(s)", len(urls)
|
||||
)
|
||||
# _mcp_web_fetch is sync httpx; run off the event loop.
|
||||
return await asyncio.to_thread(_mcp_web_fetch, list(urls), None)
|
||||
|
||||
logger.info("Parallel extract (v1 REST): %d URL(s)", len(urls))
|
||||
# v1 Extract API (client.extract, /v1/extract); full_content is set
|
||||
# via advanced_settings.
|
||||
response = await _get_async_client().extract(
|
||||
logger.info("Parallel extract: %d URL(s)", len(urls))
|
||||
response = await _get_async_client().beta.extract(
|
||||
urls=urls,
|
||||
advanced_settings={"full_content": True},
|
||||
session_id=_new_session_id(),
|
||||
full_content=True,
|
||||
)
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
|
@ -645,20 +251,13 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
)
|
||||
|
||||
for error in response.errors or []:
|
||||
err_url = getattr(error, "url", "") or ""
|
||||
err_msg = (
|
||||
getattr(error, "message", None)
|
||||
or getattr(error, "content", None)
|
||||
or getattr(error, "error_type", None)
|
||||
or "extraction failed"
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"url": err_url,
|
||||
"url": error.url or "",
|
||||
"title": "",
|
||||
"content": "",
|
||||
"error": err_msg,
|
||||
"metadata": {"sourceURL": err_url},
|
||||
"error": error.content or error.error_type or "extraction failed",
|
||||
"metadata": {"sourceURL": error.url or ""},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -680,16 +279,12 @@ class ParallelWebSearchProvider(WebSearchProvider):
|
|||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "Parallel",
|
||||
"badge": "free",
|
||||
"tag": (
|
||||
"Free web search + extraction via Parallel's hosted Search MCP "
|
||||
"— no key needed. Add PARALLEL_API_KEY for the v1 REST Search "
|
||||
"API (richer modes, higher limits)."
|
||||
),
|
||||
"badge": "paid",
|
||||
"tag": "Objective-tuned search + parallel page extraction.",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "PARALLEL_API_KEY",
|
||||
"prompt": "Parallel API key (optional — unlocks the v1 REST Search API)",
|
||||
"prompt": "Parallel API key",
|
||||
"url": "https://parallel.ai",
|
||||
},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ anthropic = ["anthropic==0.87.0"] # CVE-2026-34450, CVE-2026-34452
|
|||
# search provider (configured via `hermes tools` or config.yaml).
|
||||
exa = ["exa-py==2.10.2"]
|
||||
firecrawl = ["firecrawl-py==4.17.0"]
|
||||
parallel-web = ["parallel-web==0.6.0"]
|
||||
parallel-web = ["parallel-web==0.4.2"]
|
||||
# Image generation backends
|
||||
fal = ["fal-client==0.13.1"]
|
||||
# Edge TTS — default TTS provider but still optional (users can pick
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from agent.display import (
|
|||
set_tool_preview_max_len,
|
||||
_render_inline_unified_diff,
|
||||
_summarize_rendered_diff_sections,
|
||||
_used_free_parallel,
|
||||
render_edit_diff_with_delta,
|
||||
)
|
||||
|
||||
|
|
@ -172,46 +171,6 @@ class TestCuteToolMessagePreviewLength:
|
|||
assert "[error]" not in line
|
||||
|
||||
|
||||
class TestWebProviderLabel:
|
||||
"""The free-path "Parallel search"/"Parallel fetch" verb labeling."""
|
||||
|
||||
def test_free_search_verb_is_parallel(self):
|
||||
result = json.dumps({"success": True, "data": {"web": []}, "provider": "parallel"})
|
||||
line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1, result=result)
|
||||
assert "Parallel search" in line
|
||||
assert "hello" in line
|
||||
|
||||
def test_paid_search_verb_is_plain(self):
|
||||
result = json.dumps({"success": True, "data": {"web": [{"url": "u"}]}})
|
||||
line = get_cute_tool_message("web_search", {"query": "hi"}, 0.1, result=result)
|
||||
assert "Parallel" not in line
|
||||
assert "search" in line
|
||||
|
||||
def test_missing_result_verb_is_plain(self):
|
||||
line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1)
|
||||
assert "Parallel" not in line
|
||||
assert "search" in line
|
||||
|
||||
def test_helper_is_parallel_free_specific(self):
|
||||
# Only Parallel's free MCP path marks results; nothing else does.
|
||||
assert _used_free_parallel(json.dumps({"provider": "parallel"})) is True
|
||||
assert _used_free_parallel(json.dumps({"provider": "exa"})) is False
|
||||
assert _used_free_parallel(json.dumps({"provider": "firecrawl"})) is False
|
||||
assert _used_free_parallel(json.dumps({"success": True, "data": {}})) is False
|
||||
assert _used_free_parallel('not json') is False
|
||||
assert _used_free_parallel(None) is False
|
||||
|
||||
def test_free_extract_verb_is_parallel(self):
|
||||
result = json.dumps({"results": [{"url": "u", "content": "x"}], "provider": "parallel"})
|
||||
line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result)
|
||||
assert "Parallel fetch" in line
|
||||
|
||||
def test_paid_extract_verb_is_plain(self):
|
||||
result = json.dumps({"results": [{"url": "u", "content": "x"}]})
|
||||
line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result)
|
||||
assert "Parallel" not in line
|
||||
|
||||
|
||||
class TestEditDiffPreview:
|
||||
def test_extract_edit_diff_for_patch(self):
|
||||
diff = extract_edit_diff("patch", '{"success": true, "diff": "--- a/x\\n+++ b/x\\n"}')
|
||||
|
|
|
|||
|
|
@ -975,19 +975,6 @@ def test_toolset_has_keys_treats_no_key_providers_as_configured():
|
|||
assert _toolset_has_keys("computer_use", config) is True
|
||||
|
||||
|
||||
def test_web_no_prompt_when_usable_keyless():
|
||||
"""Fresh install: web works via the free Parallel MCP, so enabling the web
|
||||
toolset should not force provider setup."""
|
||||
with patch("tools.web_tools.check_web_api_key", return_value=True):
|
||||
assert _toolset_needs_configuration_prompt("web", {}) is False
|
||||
|
||||
|
||||
def test_web_no_prompt_when_extract_backend_is_extract_capable():
|
||||
with patch("tools.web_tools.check_web_api_key", return_value=True):
|
||||
cfg = {"web": {"extract_backend": "parallel"}}
|
||||
assert _toolset_needs_configuration_prompt("web", cfg) is False
|
||||
|
||||
|
||||
def test_computer_use_needs_configuration_when_cua_driver_post_setup_pending():
|
||||
"""No-key providers can still need setup when their post_setup is unsatisfied.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,383 +0,0 @@
|
|||
"""Keyless Parallel search via the free hosted Search MCP.
|
||||
|
||||
Covers the transport added in ``plugins/web/parallel/provider.py`` that lets
|
||||
``web_search`` work with no ``PARALLEL_API_KEY``:
|
||||
|
||||
- ``_mcp_headers`` — Bearer attached only when a key is held
|
||||
- ``_decode_mcp_envelope`` — plain-JSON and SSE (``data:``) response bodies
|
||||
- ``_mcp_payload`` — structuredContent preferred, text-block JSON fallback, errors
|
||||
- ``_mcp_web_search`` — full handshake (mocked transport) → standard search shape
|
||||
- ``ParallelWebSearchProvider.search`` — keyless path routes to the MCP
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
import plugins.web.parallel.provider as pp
|
||||
|
||||
|
||||
# ─── _mcp_headers ──────────────────────────────────────────────────────────
|
||||
|
||||
class TestMcpHeaders:
|
||||
def test_anonymous_has_no_authorization(self):
|
||||
h = pp._mcp_headers(session_id=None, api_key=None)
|
||||
assert "Authorization" not in h
|
||||
assert h["Accept"] == "application/json, text/event-stream"
|
||||
assert "Mcp-Session-Id" not in h
|
||||
|
||||
def test_user_agent_is_generic_not_hermes(self):
|
||||
# Telemetry policy: no third-party usage attribution without opt-in.
|
||||
# The UA must be set (not python-httpx default) but must not name
|
||||
# hermes, on both the anonymous and keyed paths.
|
||||
for ua in (
|
||||
pp._mcp_headers(session_id=None, api_key=None)["User-Agent"],
|
||||
pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"],
|
||||
):
|
||||
assert ua == f"{pp._MCP_CLIENT_NAME}/{pp._MCP_CLIENT_VERSION}"
|
||||
assert "hermes" not in ua.lower()
|
||||
|
||||
def test_session_id_and_bearer_when_present(self):
|
||||
h = pp._mcp_headers(session_id="sid-123", api_key="pk-live")
|
||||
assert h["Mcp-Session-Id"] == "sid-123"
|
||||
assert h["Authorization"] == "Bearer pk-live"
|
||||
|
||||
|
||||
# ─── SSE / JSON-RPC parsing ──────────────────────────────────────────────────
|
||||
|
||||
class TestMcpResponseParsing:
|
||||
def test_plain_json_matched_by_id(self):
|
||||
body = '{"jsonrpc":"2.0","id":"abc","result":{"ok":true}}'
|
||||
assert pp._mcp_response_envelope(body, "abc")["result"]["ok"] is True
|
||||
|
||||
def test_sse_selects_response_for_request_id_skipping_notifications(self):
|
||||
# A progress notification (no id) precedes the real result; an unrelated
|
||||
# response id is also present. We must pick the one matching our id.
|
||||
body = (
|
||||
'event: message\ndata: {"jsonrpc":"2.0","method":"notifications/progress","params":{"p":1}}\n\n'
|
||||
'event: message\ndata: {"jsonrpc":"2.0","id":"other","result":{"ok":false}}\n\n'
|
||||
'event: message\ndata: {"jsonrpc":"2.0","id":"req-1","result":{"ok":true}}\n\n'
|
||||
)
|
||||
env = pp._mcp_response_envelope(body, "req-1")
|
||||
assert env["result"]["ok"] is True
|
||||
|
||||
def test_sse_multiline_data_concatenated(self):
|
||||
body = 'data: {"jsonrpc":"2.0","id":"x",\ndata: "result":{"n":42}}\n\n'
|
||||
assert pp._mcp_response_envelope(body, "x")["result"]["n"] == 42
|
||||
|
||||
def test_falls_back_to_last_result_when_id_absent(self):
|
||||
body = '{"jsonrpc":"2.0","id":"server-chose","result":{"ok":true}}'
|
||||
# request id doesn't match, but there's a single result → use it
|
||||
assert pp._mcp_response_envelope(body, "mismatch")["result"]["ok"] is True
|
||||
|
||||
def test_empty_body(self):
|
||||
assert pp._mcp_response_envelope("", "x") == {}
|
||||
assert pp._mcp_response_envelope(" ", "x") == {}
|
||||
|
||||
def test_batched_json_array_flattened(self):
|
||||
# Streamable HTTP may batch messages into a JSON array.
|
||||
body = ('[{"jsonrpc":"2.0","method":"notifications/progress"},'
|
||||
'{"jsonrpc":"2.0","id":"req-9","result":{"ok":true}}]')
|
||||
assert pp._mcp_response_envelope(body, "req-9")["result"]["ok"] is True
|
||||
|
||||
def test_batched_sse_data_array_flattened(self):
|
||||
body = 'data: [{"jsonrpc":"2.0","id":"a","result":{"n":1}}]\n\n'
|
||||
assert pp._mcp_response_envelope(body, "a")["result"]["n"] == 1
|
||||
|
||||
|
||||
# ─── _mcp_payload ────────────────────────────────────────────────────────────
|
||||
|
||||
class TestMcpPayload:
|
||||
def test_prefers_structured_content(self):
|
||||
env = {"result": {"structuredContent": {"results": [{"url": "u"}]},
|
||||
"content": [{"type": "text", "text": "ignored"}]}}
|
||||
assert pp._mcp_payload(env) == {"results": [{"url": "u"}]}
|
||||
|
||||
def test_parses_text_block_json(self):
|
||||
inner = {"search_id": "s1", "results": [{"url": "u", "title": "t"}]}
|
||||
env = {"result": {"content": [{"type": "text", "text": json.dumps(inner)}]}}
|
||||
assert pp._mcp_payload(env)["search_id"] == "s1"
|
||||
|
||||
def test_raises_on_jsonrpc_error(self):
|
||||
with pytest.raises(RuntimeError, match="Parallel MCP error"):
|
||||
pp._mcp_payload({"error": {"code": -32000, "message": "boom"}})
|
||||
|
||||
def test_raises_on_tool_iserror(self):
|
||||
with pytest.raises(RuntimeError, match="Parallel MCP tool error"):
|
||||
pp._mcp_payload({"result": {"isError": True, "content": []}})
|
||||
|
||||
|
||||
# ─── _mcp_web_search (mocked transport) ──────────────────────────────────────
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, *, text="", headers=None):
|
||||
self.text = text
|
||||
self.headers = headers or {}
|
||||
|
||||
def raise_for_status(self):
|
||||
return None
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
"""Stands in for httpx.Client: replays init → ack → tools/call."""
|
||||
|
||||
def __init__(self, search_payload, init_session_id="server-sid"):
|
||||
self._search_payload = search_payload
|
||||
self._init_session_id = init_session_id
|
||||
self.calls = []
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc):
|
||||
return False
|
||||
|
||||
def post(self, url, headers=None, json=None):
|
||||
self.calls.append({"headers": headers, "json": json})
|
||||
req = json or {}
|
||||
method = req.get("method")
|
||||
req_id = req.get("id")
|
||||
if method == "initialize":
|
||||
# Echo the request id, as the real server does.
|
||||
return _FakeResponse(
|
||||
text=json_dumps({"jsonrpc": "2.0", "id": req_id,
|
||||
"result": {"protocolVersion": "2099-01-01"}}),
|
||||
headers=(
|
||||
{"mcp-session-id": self._init_session_id}
|
||||
if self._init_session_id is not None
|
||||
else {}
|
||||
),
|
||||
)
|
||||
if method == "notifications/initialized":
|
||||
return _FakeResponse(text="")
|
||||
# tools/call
|
||||
envelope = {"jsonrpc": "2.0", "id": req_id, "result": {
|
||||
"content": [{"type": "text", "text": json_dumps(self._search_payload)}],
|
||||
}}
|
||||
return _FakeResponse(text=json_dumps(envelope))
|
||||
|
||||
|
||||
def json_dumps(obj):
|
||||
return json.dumps(obj)
|
||||
|
||||
|
||||
class TestMcpWebSearch:
|
||||
def _payload(self, n):
|
||||
return {"search_id": "s", "results": [
|
||||
{"url": f"https://ex/{i}", "title": f"t{i}",
|
||||
"excerpts": [f"a{i}", f"b{i}"]}
|
||||
for i in range(n)
|
||||
]}
|
||||
|
||||
def test_returns_standard_shape_and_handshake(self):
|
||||
fake = _FakeClient(self._payload(3))
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
out = pp._mcp_web_search("hello", limit=5, api_key=None)
|
||||
|
||||
assert out["success"] is True
|
||||
# Free-tier results credit Parallel.
|
||||
assert "Parallel" in out["attribution"]
|
||||
web = out["data"]["web"]
|
||||
assert [r["position"] for r in web] == [1, 2, 3]
|
||||
assert web[0]["url"] == "https://ex/0"
|
||||
assert web[0]["description"] == "a0 b0" # excerpts joined
|
||||
# handshake order
|
||||
methods = [c["json"].get("method") for c in fake.calls]
|
||||
assert methods == ["initialize", "notifications/initialized", "tools/call"]
|
||||
# session id from the initialize response header is reused
|
||||
assert fake.calls[-1]["headers"]["Mcp-Session-Id"] == "server-sid"
|
||||
|
||||
def test_stateless_server_no_session_header_not_invented(self):
|
||||
# A stateless Streamable-HTTP server may omit mcp-session-id on
|
||||
# initialize; we must NOT invent one (sending an unissued session id can
|
||||
# get follow-up requests rejected). The follow-ups carry no header.
|
||||
fake = _FakeClient(self._payload(1), init_session_id=None)
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
out = pp._mcp_web_search("hello", limit=5, api_key=None)
|
||||
assert out["success"] is True
|
||||
follow_ups = [c for c in fake.calls if c["json"].get("method") != "initialize"]
|
||||
assert follow_ups, "expected notifications/initialized + tools/call"
|
||||
assert all("Mcp-Session-Id" not in c["headers"] for c in follow_ups)
|
||||
# anonymous → no Authorization on any call
|
||||
assert all("Authorization" not in c["headers"] for c in fake.calls)
|
||||
# tools/call mirrors query into objective + search_queries
|
||||
args = fake.calls[-1]["json"]["params"]["arguments"]
|
||||
assert args["objective"] == "hello"
|
||||
assert args["search_queries"] == ["hello"]
|
||||
|
||||
def test_limit_is_applied_client_side(self):
|
||||
fake = _FakeClient(self._payload(10))
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
out = pp._mcp_web_search("q", limit=2, api_key=None)
|
||||
assert len(out["data"]["web"]) == 2
|
||||
|
||||
def test_bearer_attached_when_key_present(self):
|
||||
fake = _FakeClient(self._payload(1))
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
pp._mcp_web_search("q", limit=1, api_key="pk-live")
|
||||
assert all(c["headers"]["Authorization"] == "Bearer pk-live" for c in fake.calls)
|
||||
|
||||
def test_negotiated_protocol_version_echoed_post_init(self):
|
||||
fake = _FakeClient(self._payload(1))
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
pp._mcp_web_search("q", limit=1, api_key=None)
|
||||
# initialize request doesn't carry the (not-yet-negotiated) version...
|
||||
assert "MCP-Protocol-Version" not in fake.calls[0]["headers"]
|
||||
# ...but notifications/initialized and tools/call echo the negotiated one.
|
||||
assert fake.calls[1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
|
||||
assert fake.calls[-1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
|
||||
|
||||
|
||||
# ─── provider.search keyless routing ─────────────────────────────────────────
|
||||
|
||||
class TestProviderKeylessSearch:
|
||||
def test_search_without_key_uses_mcp(self, monkeypatch):
|
||||
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
|
||||
captured = {}
|
||||
|
||||
def _fake(query, limit, api_key):
|
||||
captured.update(query=query, limit=limit, api_key=api_key)
|
||||
return {"success": True, "data": {"web": []}}
|
||||
|
||||
monkeypatch.setattr(pp, "_mcp_web_search", _fake)
|
||||
out = pp.ParallelWebSearchProvider().search("kittens", limit=4)
|
||||
assert out["success"] is True
|
||||
assert captured == {"query": "kittens", "limit": 4, "api_key": None}
|
||||
|
||||
def test_is_available_reflects_key(self, monkeypatch):
|
||||
# is_available() gates the registry's active-provider walk + picker, so
|
||||
# it's key-based (keyless dispatch is handled by _get_backend, not this).
|
||||
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
|
||||
assert pp.ParallelWebSearchProvider().is_available() is False
|
||||
monkeypatch.setenv("PARALLEL_API_KEY", "k")
|
||||
assert pp.ParallelWebSearchProvider().is_available() is True
|
||||
|
||||
|
||||
# ─── web_fetch (keyless extract) ─────────────────────────────────────────────
|
||||
|
||||
class TestMcpWebFetch:
|
||||
def _payload(self, urls):
|
||||
return {"extract_id": "e1", "results": [
|
||||
{"url": u, "title": f"T{i}", "publish_date": None,
|
||||
"excerpts": [f"chunk-a-{i}", f"chunk-b-{i}"]}
|
||||
for i, u in enumerate(urls)
|
||||
]}
|
||||
|
||||
def test_maps_to_extract_shape(self):
|
||||
urls = ["https://a.test", "https://b.test"]
|
||||
fake = _FakeClient(self._payload(urls))
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
out = pp._mcp_web_fetch(urls, api_key=None)
|
||||
assert [r["url"] for r in out] == urls
|
||||
assert out[0]["content"] == "chunk-a-0\n\nchunk-b-0"
|
||||
assert out[0]["raw_content"] == out[0]["content"]
|
||||
assert out[0]["metadata"] == {"sourceURL": "https://a.test", "title": "T0"}
|
||||
# tools/call targeted web_fetch, requesting full page bodies.
|
||||
args = fake.calls[-1]["json"]["params"]
|
||||
assert args["name"] == "web_fetch"
|
||||
assert args["arguments"]["urls"] == urls
|
||||
assert args["arguments"]["full_content"] is True
|
||||
assert args["arguments"]["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-")
|
||||
|
||||
def test_prefers_full_content_over_excerpts(self):
|
||||
payload = {"results": [
|
||||
{"url": "https://a.test", "title": "T",
|
||||
"excerpts": ["snippet"], "full_content": "the entire page body"},
|
||||
]}
|
||||
fake = _FakeClient(payload)
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
out = pp._mcp_web_fetch(["https://a.test"], api_key=None)
|
||||
assert out[0]["content"] == "the entire page body"
|
||||
|
||||
def test_missing_url_becomes_error_entry(self):
|
||||
# Server returns only one of the two requested URLs.
|
||||
fake = _FakeClient(self._payload(["https://a.test"]))
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
out = pp._mcp_web_fetch(["https://a.test", "https://missing.test"], api_key=None)
|
||||
assert len(out) == 2
|
||||
missing = [r for r in out if r["url"] == "https://missing.test"][0]
|
||||
assert "error" in missing
|
||||
assert missing["content"] == ""
|
||||
|
||||
def test_preserves_order_and_duplicate_inputs(self):
|
||||
# MCP returns each unique URL once; output must still be one row per
|
||||
# input, in order, including the duplicate.
|
||||
fake = _FakeClient(self._payload(["https://a.test", "https://b.test"]))
|
||||
urls = ["https://b.test", "https://a.test", "https://b.test"]
|
||||
with patch.object(pp.httpx, "Client", return_value=fake):
|
||||
out = pp._mcp_web_fetch(urls, api_key=None)
|
||||
assert [r["url"] for r in out] == urls # one row per input, in order
|
||||
assert all("error" not in r for r in out) # all three resolved
|
||||
|
||||
def test_extract_without_key_uses_web_fetch(self, monkeypatch):
|
||||
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
|
||||
captured = {}
|
||||
|
||||
def _fake(urls, api_key):
|
||||
captured.update(urls=list(urls), api_key=api_key)
|
||||
return [{"url": urls[0], "title": "", "content": "x",
|
||||
"raw_content": "x", "metadata": {}}]
|
||||
|
||||
monkeypatch.setattr(pp, "_mcp_web_fetch", _fake)
|
||||
out = asyncio.run(pp.ParallelWebSearchProvider().extract(["https://x.test"]))
|
||||
assert out[0]["content"] == "x"
|
||||
assert captured == {"urls": ["https://x.test"], "api_key": None}
|
||||
|
||||
|
||||
# ─── keyed v1 REST search ────────────────────────────────────────────────────
|
||||
|
||||
class TestKeyedV1Search:
|
||||
def test_passes_max_results_and_omits_branding(self, monkeypatch):
|
||||
monkeypatch.setenv("PARALLEL_API_KEY", "pk-live")
|
||||
monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
|
||||
captured = {}
|
||||
|
||||
class _Res:
|
||||
def __init__(self, url):
|
||||
self.url, self.title, self.excerpts = url, "T", ["x"]
|
||||
|
||||
class _Resp:
|
||||
results = [_Res(f"https://r/{i}") for i in range(10)]
|
||||
|
||||
class _Client:
|
||||
def search(self, **kw):
|
||||
captured.update(kw)
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr(pp, "_get_sync_client", lambda: _Client())
|
||||
out = pp.ParallelWebSearchProvider().search("q", limit=7)
|
||||
|
||||
assert out["success"] is True
|
||||
# honors the caller's limit via advanced_settings.max_results
|
||||
assert captured["advanced_settings"] == {"max_results": 7}
|
||||
assert captured["mode"] == "advanced" # v1 default
|
||||
assert captured["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-") # per-call id
|
||||
assert len(out["data"]["web"]) == 7 # client-side slice
|
||||
# paid path: no free-tier attribution, no [Parallel] label signal
|
||||
assert "attribution" not in out
|
||||
assert "provider" not in out
|
||||
|
||||
|
||||
# ─── v1 search mode mapping ──────────────────────────────────────────────────
|
||||
|
||||
class TestResolveSearchMode:
|
||||
@pytest.mark.parametrize("env,expected", [
|
||||
(None, "advanced"), # default
|
||||
("advanced", "advanced"),
|
||||
("basic", "basic"),
|
||||
("fast", "basic"), # legacy → basic
|
||||
("one-shot", "basic"), # legacy → basic
|
||||
("agentic", "advanced"), # legacy → advanced
|
||||
("garbage", "advanced"), # invalid → default
|
||||
("BASIC", "basic"), # case-insensitive
|
||||
])
|
||||
def test_mode_mapping(self, monkeypatch, env, expected):
|
||||
if env is None:
|
||||
monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
|
||||
else:
|
||||
monkeypatch.setenv("PARALLEL_SEARCH_MODE", env)
|
||||
assert pp._resolve_search_mode() == expected
|
||||
|
|
@ -193,16 +193,11 @@ class TestIsAvailable:
|
|||
assert p.is_available() is True
|
||||
|
||||
def test_parallel_requires_api_key(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""is_available() is key-based — it gates the registry's active-provider
|
||||
walk/picker. (Keyless search/extract still work via the free MCP through
|
||||
_get_backend's terminal default, independent of this flag.)
|
||||
"""
|
||||
_ensure_plugins_loaded()
|
||||
from agent.web_search_registry import get_provider
|
||||
|
||||
p = get_provider("parallel")
|
||||
assert p is not None
|
||||
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
|
||||
assert p.is_available() is False
|
||||
monkeypatch.setenv("PARALLEL_API_KEY", "real")
|
||||
assert p.is_available() is True
|
||||
|
|
@ -427,33 +422,17 @@ class TestErrorResponseShapes:
|
|||
assert result.get("success") is False
|
||||
assert "error" in result
|
||||
|
||||
def test_parallel_extract_keyless_uses_mcp_web_fetch(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Without a key, extract routes to the free MCP web_fetch tool rather
|
||||
than erroring. The MCP transport is mocked so the test stays offline."""
|
||||
def test_parallel_extract_returns_per_url_errors_when_unconfigured(self) -> None:
|
||||
_ensure_plugins_loaded()
|
||||
from agent.web_search_registry import get_provider
|
||||
import plugins.web.parallel.provider as parallel_provider
|
||||
|
||||
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
|
||||
captured = {}
|
||||
|
||||
def _fake_fetch(urls, api_key):
|
||||
captured["urls"] = list(urls)
|
||||
captured["api_key"] = api_key
|
||||
return [{"url": urls[0], "title": "Example", "content": "body",
|
||||
"raw_content": "body", "metadata": {"sourceURL": urls[0]}}]
|
||||
|
||||
monkeypatch.setattr(parallel_provider, "_mcp_web_fetch", _fake_fetch)
|
||||
|
||||
p = get_provider("parallel")
|
||||
assert p is not None
|
||||
result = asyncio.run(p.extract(["https://example.com"]))
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 1
|
||||
assert "error" in result[0]
|
||||
assert result[0]["url"] == "https://example.com"
|
||||
assert result[0]["content"] == "body"
|
||||
assert captured == {"urls": ["https://example.com"], "api_key": None}
|
||||
|
||||
def test_firecrawl_extract_returns_per_url_errors_when_unconfigured(self) -> None:
|
||||
_ensure_plugins_loaded()
|
||||
|
|
|
|||
|
|
@ -1,100 +0,0 @@
|
|||
"""Regression: the keyless Parallel web default must survive a failed sweep.
|
||||
|
||||
``web_search`` / ``web_extract`` are documented to work out of the box with
|
||||
zero setup via the bundled keyless Parallel free-MCP backend. That guarantee
|
||||
only holds if the bundled ``plugins/web/*`` providers are registered in
|
||||
``agent.web_search_registry``. The dispatch triggers the general plugin sweep
|
||||
(:func:`hermes_cli.plugins._ensure_plugins_discovered`) to do that — but the
|
||||
sweep can finish without registering them (its exception swallowed as a
|
||||
warning, a packaged layout where it ran before the bundled tree was
|
||||
importable, or a stale empty-discovery cache). When that happened, *both*
|
||||
tools dead-ended on "No web {search,extract} provider configured" even though
|
||||
no setup should be needed.
|
||||
|
||||
These tests pin the invariant that :func:`tools.web_tools._ensure_web_plugins_loaded`
|
||||
guarantees the keyless default is registered regardless of the sweep's outcome,
|
||||
and that the direct-registration fallback honors an explicit ``plugins.disabled``
|
||||
entry. Real imports from the bundled plugin modules — no provider mocking.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
import agent.web_search_registry as reg
|
||||
import hermes_cli.plugins as plugins
|
||||
from tools import web_tools
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_registry():
|
||||
reg._reset_for_tests()
|
||||
yield
|
||||
reg._reset_for_tests()
|
||||
|
||||
|
||||
def _boom(*_a, **_k):
|
||||
raise RuntimeError("discovery boom")
|
||||
|
||||
|
||||
def test_keyless_default_registered_when_discovery_raises(monkeypatch):
|
||||
"""A swallowed discovery failure must not strand the keyless default."""
|
||||
monkeypatch.setattr(plugins, "_ensure_plugins_discovered", _boom)
|
||||
assert reg.get_provider("parallel") is None
|
||||
|
||||
web_tools._ensure_web_plugins_loaded()
|
||||
|
||||
parallel = reg.get_provider("parallel")
|
||||
assert parallel is not None, "keyless Parallel default not restored"
|
||||
# It is the universal keyless default precisely because it does both.
|
||||
assert parallel.supports_search()
|
||||
assert parallel.supports_extract()
|
||||
|
||||
|
||||
def test_fallback_registers_full_bundled_set(monkeypatch):
|
||||
"""The fix covers the whole bundled provider class, not just parallel."""
|
||||
monkeypatch.setattr(plugins, "_ensure_plugins_discovered", _boom)
|
||||
|
||||
web_tools._ensure_web_plugins_loaded()
|
||||
|
||||
names = {p.name for p in reg.list_providers()}
|
||||
# Every bundled backend a user might have configured should be reachable
|
||||
# again, so an explicit ``web.extract_backend: firecrawl`` etc. resolves.
|
||||
for expected in ("parallel", "firecrawl", "tavily", "exa"):
|
||||
assert expected in names, f"{expected} missing after fallback"
|
||||
|
||||
|
||||
def test_fallback_honors_explicit_disable(monkeypatch):
|
||||
"""A backend the user turned off via plugins.disabled stays off."""
|
||||
monkeypatch.setattr(plugins, "_get_disabled_plugins", lambda: {"web-parallel"})
|
||||
|
||||
web_tools._register_bundled_web_providers_directly()
|
||||
|
||||
names = {p.name for p in reg.list_providers()}
|
||||
assert "parallel" not in names, "explicit disable was ignored"
|
||||
# Other bundled backends are unaffected by the parallel disable.
|
||||
assert "tavily" in names
|
||||
|
||||
|
||||
def test_fallback_is_noop_when_discovery_already_registered(monkeypatch):
|
||||
"""Healthy path: don't pay for the direct sweep when parallel is present."""
|
||||
# Pretend the general sweep already registered the keyless default.
|
||||
import importlib
|
||||
|
||||
class _Ctx:
|
||||
def register_web_search_provider(self, provider):
|
||||
reg.register_provider(provider)
|
||||
|
||||
importlib.import_module("plugins.web.parallel").register(_Ctx())
|
||||
monkeypatch.setattr(plugins, "_ensure_plugins_discovered", lambda *a, **k: None)
|
||||
|
||||
calls = {"n": 0}
|
||||
real = web_tools._register_bundled_web_providers_directly
|
||||
|
||||
def _spy():
|
||||
calls["n"] += 1
|
||||
real()
|
||||
|
||||
monkeypatch.setattr(web_tools, "_register_bundled_web_providers_directly", _spy)
|
||||
web_tools._ensure_web_plugins_loaded()
|
||||
|
||||
assert calls["n"] == 0, "direct-registration ran on the healthy path"
|
||||
|
|
@ -167,21 +167,6 @@ class TestPerCapabilityBackendSelection:
|
|||
monkeypatch.setenv("TAVILY_API_KEY", "test-key")
|
||||
assert web_tools._get_search_backend() == "tavily"
|
||||
|
||||
def test_explicit_extract_backend_honored_when_unavailable(self, monkeypatch):
|
||||
"""An explicit per-capability backend is honored even with no creds, so
|
||||
its setup error surfaces instead of silently rerouting to the keyless
|
||||
Parallel default (which would send user URLs to a different provider)."""
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
"extract_backend": "firecrawl",
|
||||
})
|
||||
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_GATEWAY_URL"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False, raising=False)
|
||||
# Resolves to firecrawl (not parallel) despite firecrawl being unavailable.
|
||||
assert web_tools._get_extract_backend() == "firecrawl"
|
||||
|
||||
def test_falls_back_to_generic_backend_when_extract_backend_empty(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
|
|
@ -192,7 +177,7 @@ class TestPerCapabilityBackendSelection:
|
|||
monkeypatch.setenv("PARALLEL_API_KEY", "test-key")
|
||||
assert web_tools._get_extract_backend() == "parallel"
|
||||
|
||||
def test_explicit_search_backend_honored_when_unavailable(self, monkeypatch):
|
||||
def test_search_backend_ignored_when_not_available(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
|
|
@ -201,10 +186,8 @@ class TestPerCapabilityBackendSelection:
|
|||
})
|
||||
monkeypatch.delenv("EXA_API_KEY", raising=False)
|
||||
monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
|
||||
# The explicit per-capability choice (exa) is honored even though it's
|
||||
# unavailable, so its setup error surfaces — we don't silently reroute
|
||||
# to the shared backend (or the keyless Parallel default).
|
||||
assert web_tools._get_search_backend() == "exa"
|
||||
# Should fall back to firecrawl since exa isn't configured
|
||||
assert web_tools._get_search_backend() == "firecrawl"
|
||||
|
||||
def test_fully_backward_compatible_with_web_backend_only(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
|
@ -308,55 +291,26 @@ class TestUnconfiguredErrorEnvelopeParity:
|
|||
):
|
||||
monkeypatch.delenv(k, raising=False)
|
||||
|
||||
def test_extract_empty_urls_does_not_raise(self, monkeypatch):
|
||||
"""Regression: empty (or fully SSRF-blocked) URL sets skip the dispatch
|
||||
branch; the free-Parallel flag must still be initialized so the tool
|
||||
returns an error envelope instead of UnboundLocalError."""
|
||||
import asyncio
|
||||
from tools import web_tools
|
||||
self._clear_web_creds(monkeypatch)
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||
out = asyncio.run(web_tools.web_extract_tool([], "markdown"))
|
||||
# The key assertion is that it returns a normal error envelope (a
|
||||
# string) rather than raising UnboundLocalError.
|
||||
assert isinstance(out, str)
|
||||
result = json.loads(out)
|
||||
assert "error" in result
|
||||
|
||||
def test_unconfigured_search_falls_back_to_free_parallel(self, monkeypatch):
|
||||
"""``web_search_tool`` with no creds routes to Parallel's free Search
|
||||
MCP rather than erroring. The MCP transport is mocked so the test
|
||||
stays offline; we assert dispatch landed on parallel and returned the
|
||||
standard search envelope.
|
||||
def test_unconfigured_search_emits_top_level_error(self, monkeypatch):
|
||||
"""``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
|
||||
— matching main's ``tool_error()`` envelope, not a per-result shape.
|
||||
"""
|
||||
from tools import web_tools
|
||||
import plugins.web.parallel.provider as parallel_provider
|
||||
|
||||
self._clear_web_creds(monkeypatch)
|
||||
# Reset firecrawl client cache so the unconfigured state is re-evaluated
|
||||
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||
|
||||
captured = {}
|
||||
|
||||
def _fake_mcp(query, limit, api_key):
|
||||
captured["query"] = query
|
||||
captured["api_key"] = api_key
|
||||
return {
|
||||
"success": True,
|
||||
"data": {"web": [
|
||||
{"url": "https://example.com", "title": "Example",
|
||||
"description": "hit", "position": 1},
|
||||
]},
|
||||
}
|
||||
|
||||
monkeypatch.setattr(parallel_provider, "_mcp_web_search", _fake_mcp)
|
||||
|
||||
result = json.loads(web_tools.web_search_tool("hello world", limit=3))
|
||||
assert result.get("success") is True, f"expected success, got {result}"
|
||||
assert result["data"]["web"][0]["url"] == "https://example.com"
|
||||
# Keyless path: dispatched to parallel with no Bearer token.
|
||||
assert captured == {"query": "hello world", "api_key": None}
|
||||
assert "error" in result, f"expected top-level 'error' key, got {result}"
|
||||
# ``Error searching web:`` prefix comes from web_tools' top-level except handler
|
||||
assert "Error searching web:" in result["error"]
|
||||
assert "FIRECRAWL_API_KEY" in result["error"]
|
||||
# No per-result burying
|
||||
assert "results" not in result
|
||||
|
||||
|
||||
class TestDispatchersTriggerPluginDiscovery:
|
||||
|
|
|
|||
|
|
@ -190,11 +190,7 @@ class TestDDGSBackendWiring:
|
|||
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||
assert web_tools._get_backend() == "exa"
|
||||
|
||||
def test_auto_detect_prefers_keyless_parallel_over_ddgs(self, monkeypatch):
|
||||
# With no credentials, keyless Parallel is the auto-detect default even
|
||||
# when the ddgs package is installed — ddgs is search-only (can't
|
||||
# extract), so Parallel is preferred so both search and extract work.
|
||||
# ddgs remains reachable via an explicit web.backend=ddgs.
|
||||
def test_auto_detect_picks_ddgs_as_last_resort(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY",
|
||||
|
|
@ -202,7 +198,7 @@ class TestDDGSBackendWiring:
|
|||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||
assert web_tools._get_backend() == "parallel"
|
||||
assert web_tools._get_backend() == "ddgs"
|
||||
|
||||
def test_check_web_api_key_true_when_ddgs_configured(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
|
|
|||
|
|
@ -313,9 +313,7 @@ class TestCheckWebApiKey:
|
|||
)
|
||||
assert web_tools.check_web_api_key() is True
|
||||
|
||||
def test_no_credentials_usable_via_free_parallel(self, monkeypatch):
|
||||
"""No credentials → check_web_api_key True: the keyless Parallel free MCP
|
||||
services calls, so web is usable out of the box."""
|
||||
def test_no_credentials_fails(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||
monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
|
||||
|
|
@ -327,7 +325,7 @@ class TestCheckWebApiKey:
|
|||
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||
monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
|
||||
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
|
||||
assert web_tools.check_web_api_key() is True
|
||||
assert web_tools.check_web_api_key() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -384,14 +384,12 @@ class TestBackendSelection:
|
|||
patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_fallback_no_keys_defaults_to_parallel(self):
|
||||
"""No credentials, no config → 'parallel' (free Search MCP works
|
||||
keyless). Selection is purely credential-based."""
|
||||
def test_fallback_no_keys_defaults_to_firecrawl(self):
|
||||
"""No keys, no config → 'firecrawl' (will fail at client init)."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
|
||||
patch("tools.web_tools._ddgs_package_importable", return_value=False):
|
||||
assert _get_backend() == "parallel"
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_invalid_config_falls_through_to_fallback(self):
|
||||
"""web.backend=invalid → ignored, uses key-based fallback."""
|
||||
|
|
@ -626,73 +624,9 @@ class TestCheckWebApiKey:
|
|||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_no_keys_usable_via_free_parallel(self):
|
||||
"""No credentials → check_web_api_key True: selection resolves to the
|
||||
keyless Parallel free MCP, which genuinely services calls (web works out
|
||||
of the box). check_web_api_key is a usability probe, not a key check."""
|
||||
def test_no_keys_returns_false(self):
|
||||
from tools.web_tools import check_web_api_key
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
|
||||
patch("tools.web_tools._ddgs_package_importable", return_value=False), \
|
||||
patch.dict(os.environ, {}, clear=False):
|
||||
for k in ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
|
||||
"TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"):
|
||||
os.environ.pop(k, None)
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_typo_extract_backend_not_masked_by_parallel(self):
|
||||
"""A typo'd per-capability backend is honored (so dispatch errors)
|
||||
rather than silently falling through to keyless Parallel."""
|
||||
from tools.web_tools import _get_extract_backend, check_web_api_key
|
||||
with patch("tools.web_tools._load_web_config",
|
||||
return_value={"extract_backend": "parrallel"}):
|
||||
assert _get_extract_backend() == "parrallel" # not "parallel"
|
||||
assert check_web_api_key() is False # unknown → unusable
|
||||
|
||||
def test_keyless_parallel_unusable_when_provider_disabled(self):
|
||||
"""If the bundled web-parallel provider is disabled/unregistered, the
|
||||
keyless free-MCP path must NOT report web as usable — otherwise setup is
|
||||
skipped but web tools fail at runtime with no provider."""
|
||||
from tools.web_tools import check_web_api_key
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch("tools.web_tools._parallel_provider_registered", return_value=False), \
|
||||
patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
|
||||
patch("tools.web_tools.check_firecrawl_api_key", return_value=False), \
|
||||
patch("tools.web_tools._ddgs_package_importable", return_value=False), \
|
||||
patch.dict(os.environ, {}, clear=False):
|
||||
for var in (
|
||||
"PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
|
||||
"TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY", "SEARXNG_URL",
|
||||
):
|
||||
os.environ.pop(var, None)
|
||||
assert check_web_api_key() is False
|
||||
|
||||
def test_extract_autodetect_skips_search_only_for_keyless_parallel(self):
|
||||
"""A search-only env credential (SEARXNG_URL) must not shadow the keyless
|
||||
Parallel free-MCP extract fallback: extract auto-detect skips search-only
|
||||
backends, so _get_extract_backend resolves to parallel (which can fetch),
|
||||
while search auto-detect still prefers the configured searxng."""
|
||||
from tools.web_tools import _get_extract_backend, _get_search_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch.dict(os.environ, {}, clear=False):
|
||||
for var in (
|
||||
"PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
|
||||
"TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY",
|
||||
):
|
||||
os.environ.pop(var, None)
|
||||
os.environ["SEARXNG_URL"] = "http://localhost:8080"
|
||||
with patch("tools.web_tools._is_tool_gateway_ready", return_value=False):
|
||||
assert _get_search_backend() == "searxng"
|
||||
assert _get_extract_backend() == "parallel"
|
||||
|
||||
def test_configured_but_unavailable_backend_reports_unusable(self):
|
||||
"""An explicitly configured backend with no creds (exa, no key) →
|
||||
check_web_api_key False so diagnostics flag the misconfiguration —
|
||||
even though the tools stay registered."""
|
||||
from tools.web_tools import check_web_api_key
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
|
||||
patch.dict(os.environ, {}, clear=False):
|
||||
os.environ.pop("EXA_API_KEY", None)
|
||||
with patch("tools.web_tools._ddgs_package_importable", return_value=False):
|
||||
assert check_web_api_key() is False
|
||||
|
||||
def test_both_keys_returns_true(self):
|
||||
|
|
@ -756,18 +690,12 @@ class TestCheckWebApiKey:
|
|||
|
||||
assert refresh_calls == []
|
||||
|
||||
def test_web_tools_registered_even_when_configured_backend_unavailable(self):
|
||||
# Registration is unconditional (web_tools_registered) so an explicitly
|
||||
# configured but unavailable backend (exa without EXA_API_KEY) keeps the
|
||||
# tools registered to surface exa's setup error at call time — while the
|
||||
# readiness probe (check_web_api_key) honestly reports not-configured.
|
||||
from tools.web_tools import web_tools_registered, check_web_api_key
|
||||
assert web_tools_registered() is True
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
|
||||
patch.dict(os.environ, {}, clear=False):
|
||||
os.environ.pop("EXA_API_KEY", None)
|
||||
assert web_tools_registered() is True
|
||||
assert check_web_api_key() is False
|
||||
def test_configured_backend_must_match_available_provider(self):
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
|
||||
with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"):
|
||||
with patch.dict(os.environ, {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"}, clear=False):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is False
|
||||
|
||||
def test_configured_firecrawl_backend_accepts_managed_gateway(self):
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}):
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
|
|||
# ─── Web search backends ───────────────────────────────────────────────
|
||||
"search.exa": ("exa-py==2.10.2",),
|
||||
"search.firecrawl": ("firecrawl-py==4.17.0",),
|
||||
"search.parallel": ("parallel-web==0.6.0",),
|
||||
"search.parallel": ("parallel-web==0.4.2",),
|
||||
|
||||
# ─── TTS providers ─────────────────────────────────────────────────────
|
||||
# Pinned to exact versions to match pyproject.toml's no-ranges policy
|
||||
|
|
|
|||
|
|
@ -141,35 +141,15 @@ def _load_web_config() -> dict:
|
|||
except (ImportError, Exception):
|
||||
return {}
|
||||
|
||||
# Recognized web backend names (config values accepted in ``web.backend`` /
|
||||
# ``web.search_backend`` / ``web.extract_backend``). Kept as a single source of
|
||||
# truth for config validation across the selection helpers.
|
||||
_KNOWN_WEB_BACKENDS = frozenset(
|
||||
{"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}
|
||||
)
|
||||
|
||||
# Backends that only service web_search (their provider's ``supports_extract()``
|
||||
# is False). They are skipped during *extract* auto-detect so a search-only
|
||||
# credential (e.g. SEARXNG_URL) does not shadow the keyless Parallel free-MCP
|
||||
# fallback, which would otherwise leave web_extract broken on a no-key install.
|
||||
_SEARCH_ONLY_BACKENDS = frozenset({"searxng", "brave-free", "ddgs", "xai"})
|
||||
|
||||
|
||||
def _get_backend(capability: str = "search") -> str:
|
||||
def _get_backend() -> str:
|
||||
"""Determine which web backend to use (shared fallback).
|
||||
|
||||
Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
|
||||
Falls back to whichever API key is present for users who configured
|
||||
keys manually without running setup.
|
||||
|
||||
``capability`` ("search" | "extract") only affects auto-detect: for
|
||||
``extract`` we skip search-only backends (``_SEARCH_ONLY_BACKENDS``) so a
|
||||
search-only credential never shadows the keyless Parallel free-MCP extract
|
||||
fallback. An explicit ``web.backend`` value is honored as-is (explicit wins,
|
||||
surfacing that backend's own search-only error rather than rerouting).
|
||||
"""
|
||||
configured = (_load_web_config().get("backend") or "").lower().strip()
|
||||
if configured in _KNOWN_WEB_BACKENDS:
|
||||
if configured in {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}:
|
||||
return configured
|
||||
|
||||
# Fallback for manual / legacy config — pick the highest-priority
|
||||
|
|
@ -178,8 +158,7 @@ def _get_backend(capability: str = "search") -> str:
|
|||
# pre-empted by a Nous OAuth token whose subscription tier may not
|
||||
# actually grant web-search access (the gateway then fails at runtime
|
||||
# with "no subscription" and the tool returns an error to the agent
|
||||
# without falling back). Free-tier backends (searxng / brave-free /
|
||||
# keyless parallel / ddgs) trail the keyed ones.
|
||||
# without falling back). Free-tier backends trail the paid ones.
|
||||
backend_candidates = (
|
||||
("tavily", _has_env("TAVILY_API_KEY")),
|
||||
("exa", _has_env("EXA_API_KEY")),
|
||||
|
|
@ -188,24 +167,13 @@ def _get_backend(capability: str = "search") -> str:
|
|||
("firecrawl", _is_tool_gateway_ready()),
|
||||
("searxng", _has_env("SEARXNG_URL")),
|
||||
("brave-free", _has_env("BRAVE_SEARCH_API_KEY")),
|
||||
# Keyless Parallel free MCP — always available, the intended no-key
|
||||
# default for both search and extract. Ahead of ddgs (search-only, so it
|
||||
# can't service web_extract); ddgs stays reachable via web.backend=ddgs.
|
||||
("parallel", True),
|
||||
("ddgs", _ddgs_package_importable()),
|
||||
)
|
||||
for backend, available in backend_candidates:
|
||||
if not available:
|
||||
continue
|
||||
# For extract, skip search-only backends so the keyless Parallel
|
||||
# free-MCP fallback (which can fetch URLs) is reached instead.
|
||||
if capability == "extract" and backend in _SEARCH_ONLY_BACKENDS:
|
||||
continue
|
||||
return backend
|
||||
if available:
|
||||
return backend
|
||||
|
||||
# Defensive terminal (the keyless ``parallel`` candidate above is always
|
||||
# available, so this is effectively unreachable).
|
||||
return "parallel"
|
||||
return "firecrawl" # default (backward compat)
|
||||
|
||||
|
||||
def _get_search_backend() -> str:
|
||||
|
|
@ -236,19 +204,14 @@ def _get_extract_backend() -> str:
|
|||
def _get_capability_backend(capability: str) -> str:
|
||||
"""Shared helper for per-capability backend selection.
|
||||
|
||||
Reads ``web.{capability}_backend`` from config. Any explicit value is
|
||||
honored **regardless of availability** — including unrecognized typos like
|
||||
``parrallel`` — so the dispatcher surfaces that backend's own setup/config
|
||||
error rather than silently rerouting to the keyless Parallel default (which
|
||||
would send user queries to a different provider and hide the
|
||||
misconfiguration). This matches ``web_search_registry``'s "explicit config
|
||||
wins" rule. Only an *unset* value falls through to ``_get_backend()``.
|
||||
Reads ``web.{capability}_backend`` from config; if set and available,
|
||||
uses it. Otherwise falls through to the shared ``_get_backend()``.
|
||||
"""
|
||||
cfg = _load_web_config()
|
||||
specific = (cfg.get(f"{capability}_backend") or "").lower().strip()
|
||||
if specific:
|
||||
if specific and _is_backend_available(specific):
|
||||
return specific
|
||||
return _get_backend(capability)
|
||||
return _get_backend()
|
||||
|
||||
|
||||
def _is_backend_available(backend: str) -> bool:
|
||||
|
|
@ -256,8 +219,6 @@ def _is_backend_available(backend: str) -> bool:
|
|||
if backend == "exa":
|
||||
return _has_env("EXA_API_KEY")
|
||||
if backend == "parallel":
|
||||
# Credential probe: True only with a real key. The keyless free-MCP
|
||||
# fallback is handled by _get_backend()'s terminal default, not here.
|
||||
return _has_env("PARALLEL_API_KEY")
|
||||
if backend == "firecrawl":
|
||||
return check_firecrawl_api_key()
|
||||
|
|
@ -810,17 +771,6 @@ def _ensure_web_plugins_loaded() -> None:
|
|||
Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` exactly:
|
||||
the underlying discovery call is idempotent and cheap on subsequent
|
||||
invocations.
|
||||
|
||||
Triggering discovery is necessary but not *sufficient*: the sweep can
|
||||
finish without registering the bundled web providers (its exception
|
||||
swallowed below as a warning, a packaged layout where discovery ran before
|
||||
the bundled tree was importable, or a stale empty-discovery cache). When
|
||||
that happens the registry is empty and *both* web_search and web_extract
|
||||
dead-end on "No web {search,extract} provider configured" — even though the
|
||||
keyless Parallel default is supposed to work with zero setup. So after
|
||||
discovery we verify the keyless default landed and, if not, register the
|
||||
bundled providers directly (see
|
||||
:func:`_register_bundled_web_providers_directly`).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.plugins import _ensure_plugins_discovered
|
||||
|
|
@ -833,87 +783,6 @@ def _ensure_web_plugins_loaded() -> None:
|
|||
# clue in normal logs about the real cause.
|
||||
logger.warning("Web plugin discovery failed (non-fatal): %s", exc)
|
||||
|
||||
# Belt-and-suspenders: guarantee the keyless Parallel default (the
|
||||
# documented zero-setup backend for both web_search and web_extract) is
|
||||
# actually registered. The lookup is a cheap dict hit on the healthy path
|
||||
# (discovery already registered it → no-op); only an empty registry pays
|
||||
# for the direct-registration sweep.
|
||||
try:
|
||||
from agent.web_search_registry import get_provider
|
||||
|
||||
if get_provider("parallel") is None:
|
||||
_register_bundled_web_providers_directly()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("Bundled web provider fallback check failed: %s", exc)
|
||||
|
||||
|
||||
def _register_bundled_web_providers_directly() -> None:
|
||||
"""Register the repo's bundled web providers without the plugin manager.
|
||||
|
||||
The normal path is the general plugin sweep
|
||||
(:func:`hermes_cli.plugins._ensure_plugins_discovered`), which auto-loads
|
||||
every ``plugins/web/<name>`` backend (they are ``kind: backend``). This
|
||||
fallback exists for the runtimes where that sweep does not leave the web
|
||||
registry populated — so the keyless Parallel default (and any bundled
|
||||
backend the user explicitly configured) keeps working instead of
|
||||
surfacing a misleading "No web provider configured" error.
|
||||
|
||||
Imports each bundled ``plugins/web/<name>`` package and calls its
|
||||
``register()`` directly against :mod:`agent.web_search_registry`. Idempotent
|
||||
(re-register overwrites) and honors an explicit ``plugins.disabled`` entry
|
||||
so a backend the user turned off stays off.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.plugins import (
|
||||
_get_disabled_plugins,
|
||||
get_bundled_plugins_dir,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("Bundled web provider fallback unavailable: %s", exc)
|
||||
return
|
||||
|
||||
web_dir = get_bundled_plugins_dir() / "web"
|
||||
if not web_dir.is_dir():
|
||||
return
|
||||
|
||||
disabled = _get_disabled_plugins()
|
||||
|
||||
from agent.web_search_provider import WebSearchProvider
|
||||
from agent.web_search_registry import register_provider
|
||||
|
||||
class _DirectRegistrationCtx:
|
||||
"""Minimal plugin ctx exposing only web-provider registration."""
|
||||
|
||||
def register_web_search_provider(self, provider) -> None:
|
||||
if isinstance(provider, WebSearchProvider):
|
||||
register_provider(provider)
|
||||
|
||||
ctx = _DirectRegistrationCtx()
|
||||
import importlib
|
||||
|
||||
for child in sorted(web_dir.iterdir()):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if not (child / "plugin.yaml").exists() and not (child / "plugin.yml").exists():
|
||||
continue
|
||||
# Respect an explicit disable — match discover_and_load's key/name
|
||||
# check (key ``web/<dir>``; manifest name ``web-<dir-with-dashes>``).
|
||||
if (
|
||||
f"web/{child.name}" in disabled
|
||||
or f"web-{child.name.replace('_', '-')}" in disabled
|
||||
):
|
||||
continue
|
||||
try:
|
||||
module = importlib.import_module(f"plugins.web.{child.name}")
|
||||
register_fn = getattr(module, "register", None)
|
||||
if callable(register_fn):
|
||||
register_fn(ctx)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug(
|
||||
"Direct registration of bundled web provider '%s' failed: %s",
|
||||
child.name, exc,
|
||||
)
|
||||
|
||||
|
||||
def web_search_tool(query: str, limit: int = 5) -> str:
|
||||
"""
|
||||
|
|
@ -1103,19 +972,11 @@ async def web_extract_tool(
|
|||
else:
|
||||
safe_urls.append(url)
|
||||
|
||||
# Tracks the free-tier Parallel extract path (no key → web_fetch via the
|
||||
# hosted Search MCP) so we can credit Parallel in the output/UI. Bound
|
||||
# here so empty/all-blocked inputs (which skip dispatch) stay defined.
|
||||
_free_parallel_extract = False
|
||||
|
||||
# Dispatch only safe URLs to the configured backend
|
||||
if not safe_urls:
|
||||
results = []
|
||||
else:
|
||||
backend = _get_extract_backend()
|
||||
_free_parallel_extract = (
|
||||
backend == "parallel" and not _has_env("PARALLEL_API_KEY")
|
||||
)
|
||||
|
||||
# All seven providers (brave-free, ddgs, searxng, exa, parallel,
|
||||
# tavily, firecrawl) now live as plugins. The dispatcher is a
|
||||
|
|
@ -1289,14 +1150,6 @@ async def web_extract_tool(
|
|||
for r in response.get("results", [])
|
||||
]
|
||||
trimmed_response = {"results": trimmed_results}
|
||||
if _free_parallel_extract:
|
||||
# Credit Parallel's free Search MCP (drives the "[Parallel]" UI tag
|
||||
# + lets the model cite the source). Free tier only.
|
||||
trimmed_response["provider"] = "parallel"
|
||||
trimmed_response["attribution"] = (
|
||||
"Extraction powered by the free Parallel Web Search MCP "
|
||||
"(https://parallel.ai)."
|
||||
)
|
||||
|
||||
if trimmed_response.get("results") == []:
|
||||
result_json = tool_error("Content was inaccessible or not found")
|
||||
|
|
@ -1328,61 +1181,16 @@ async def web_extract_tool(
|
|||
return tool_error(error_msg)
|
||||
|
||||
|
||||
def web_tools_registered() -> bool:
|
||||
"""Whether the web tools should be registered. Always True.
|
||||
|
||||
Registration is decoupled from credential readiness: with no credentials,
|
||||
search/extract fall back to Parallel's free hosted Search MCP, and an
|
||||
explicitly configured-but-unavailable backend must stay registered so
|
||||
dispatch surfaces that backend's own setup error rather than the tool
|
||||
silently vanishing. For "is web actually configured?" use
|
||||
:func:`check_web_api_key`.
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
def _parallel_provider_registered() -> bool:
|
||||
"""True when the bundled ``web-parallel`` provider is registered/enabled.
|
||||
|
||||
Plugin discovery skips disabled plugins, so a disabled (``plugins.disabled``)
|
||||
or otherwise-unregistered parallel provider yields ``None`` here.
|
||||
"""
|
||||
_ensure_web_plugins_loaded()
|
||||
try:
|
||||
from agent.web_search_registry import get_provider
|
||||
|
||||
return get_provider("parallel") is not None
|
||||
except Exception: # noqa: BLE001
|
||||
return False
|
||||
|
||||
|
||||
def _backend_usable(backend: str) -> bool:
|
||||
"""True when *backend* can service calls. Keyless Parallel counts (free MCP).
|
||||
|
||||
Unknown/typo'd backend names are not usable (so an explicit typo is reported
|
||||
as a config problem rather than masked by the keyless fallback).
|
||||
"""
|
||||
if backend == "parallel" and not _has_env("PARALLEL_API_KEY"):
|
||||
# Keyless Parallel is only genuinely usable when its provider is actually
|
||||
# registered/enabled. If web-parallel is disabled or discovery failed,
|
||||
# report unusable so setup is not skipped and the user is not left with
|
||||
# web tools that fail at runtime ("No web search provider configured").
|
||||
return _parallel_provider_registered()
|
||||
return _is_backend_available(backend)
|
||||
|
||||
|
||||
# Convenience function to check Firecrawl credentials
|
||||
def check_web_api_key() -> bool:
|
||||
"""Usability probe: True when the selected web backends can service calls.
|
||||
|
||||
Probes the backends that :func:`_get_search_backend` /
|
||||
:func:`_get_extract_backend` actually select (not just shared
|
||||
``web.backend``), so an explicit per-capability backend with missing
|
||||
credentials — or a typo'd name — reports unusable instead of being masked by
|
||||
the keyless Parallel fallback. Keyless Parallel itself genuinely services
|
||||
calls, so a zero-setup install reports usable. Distinct from
|
||||
:func:`web_tools_registered` (always True — whether the tool is offered).
|
||||
"""
|
||||
return _backend_usable(_get_search_backend()) and _backend_usable(_get_extract_backend())
|
||||
"""Check whether the configured web backend is available."""
|
||||
configured = _load_web_config().get("backend", "").lower().strip()
|
||||
if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}:
|
||||
return _is_backend_available(configured)
|
||||
return any(
|
||||
_is_backend_available(backend)
|
||||
for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai")
|
||||
)
|
||||
|
||||
|
||||
def check_auxiliary_model() -> bool:
|
||||
|
|
@ -1550,7 +1358,7 @@ registry.register(
|
|||
toolset="web",
|
||||
schema=WEB_SEARCH_SCHEMA,
|
||||
handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=args.get("limit", 5)),
|
||||
check_fn=web_tools_registered,
|
||||
check_fn=check_web_api_key,
|
||||
requires_env=_web_requires_env(),
|
||||
emoji="🔍",
|
||||
max_result_size_chars=100_000,
|
||||
|
|
@ -1561,7 +1369,7 @@ registry.register(
|
|||
schema=WEB_EXTRACT_SCHEMA,
|
||||
handler=lambda args, **kw: web_extract_tool(
|
||||
args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"),
|
||||
check_fn=web_tools_registered,
|
||||
check_fn=check_web_api_key,
|
||||
requires_env=_web_requires_env(),
|
||||
is_async=True,
|
||||
emoji="📄",
|
||||
|
|
|
|||
8
uv.lock
generated
8
uv.lock
generated
|
|
@ -1654,7 +1654,7 @@ requires-dist = [
|
|||
{ name = "numpy", marker = "extra == 'voice'", specifier = "==2.4.3" },
|
||||
{ name = "openai", specifier = "==2.24.0" },
|
||||
{ name = "packaging", specifier = "==26.0" },
|
||||
{ name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.6.0" },
|
||||
{ name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.4.2" },
|
||||
{ name = "pathspec", specifier = "==1.1.1" },
|
||||
{ name = "pillow", specifier = "==12.2.0" },
|
||||
{ name = "prompt-toolkit", specifier = "==3.0.52" },
|
||||
|
|
@ -2690,7 +2690,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "parallel-web"
|
||||
version = "0.6.0"
|
||||
version = "0.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
|
|
@ -2700,9 +2700,9 @@ dependencies = [
|
|||
{ name = "sniffio" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7f/81/101c961fe6665212df01fb39a70ebb379dc33529c7bc9210675c0f525139/parallel_web-0.6.0.tar.gz", hash = "sha256:f8aecd3f1958090090c4516881cefea4f55c40948ba3bb99217ca9a6d4263225", size = 173149, upload-time = "2026-05-06T19:13:09.782Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/24/50/fb9b28a679e01682006b5259abff96de3d16e114e9447a7793fec31715de/parallel_web-0.4.2.tar.gz", hash = "sha256:599b5a8f387dc35c7dc8c81e372eadf6958a40acacea58bf170dfc663c003da7", size = 140026, upload-time = "2026-03-09T22:24:35.448Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/7c/7e8b63a0e90efaf567a818fca86c6ad3a85711f8995d2657b51b0cae2351/parallel_web-0.6.0-py3-none-any.whl", hash = "sha256:dc5342ef7262bd2e9f85eb7eace32833bd3d7e3af0bf5fbd780d1ea8c8d9ceb0", size = 199217, upload-time = "2026-05-06T19:13:08.316Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/3e/2218fa29637781b8e7ac35a928108ff2614ddd40879389d3af2caa725af5/parallel_web-0.4.2-py3-none-any.whl", hash = "sha256:aa3a4a9aecc08972c5ce9303271d4917903373dff4dd277d9a3e30f9cff53346", size = 144012, upload-time = "2026-03-09T22:24:33.979Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue