revert(web): remove keyless Parallel search fallback (#46350)

Remove the free Parallel Search MCP path and restore the keyed Parallel backend behavior from before it was introduced.

Also drops the keyless fallback registration/display labeling tests and returns the Parallel SDK pin to the prior version.
This commit is contained in:
Teknium 2026-06-14 16:47:57 -07:00 committed by GitHub
parent a829e04d62
commit f3fe99863d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 98 additions and 1398 deletions

View file

@ -858,20 +858,6 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
return False, ""
def _used_free_parallel(result: str | None) -> bool:
"""True when a web result came from Parallel's free Search MCP.
Only the keyless Parallel path tags its result with ``provider="parallel"``;
the paid REST path and every other provider omit it. Used to label the tool
line "Parallel search" / "Parallel fetch" exactly when the free MCP served
the call.
"""
if not isinstance(result, str) or '"provider"' not in result:
return False
data = safe_json_loads(result)
return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
def get_cute_tool_message(
tool_name: str, args: dict, duration: float, result: str | None = None,
) -> str:
@ -909,17 +895,15 @@ def get_cute_tool_message(
return f"{line}{failure_suffix}"
if tool_name == "web_search":
verb = "Parallel search" if _used_free_parallel(result) else "search"
return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)} {dur}")
return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}")
if tool_name == "web_extract":
verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
urls = args.get("urls", [])
if urls:
url = urls[0] if isinstance(urls, list) else str(urls)
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra} {dur}")
return _wrap(f"┊ 📄 {verb:<9} pages {dur}")
return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
return _wrap(f"┊ 📄 fetch pages {dur}")
if tool_name == "terminal":
return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
if tool_name == "process":

View file

@ -2182,13 +2182,8 @@ def _toolset_needs_configuration_prompt(
tts_cfg = config.get("tts", {})
return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg
if ts_key == "web":
# Web works out of the box via Parallel's free Search MCP (no key), so
# don't force setup just because ``web.backend`` is unset — only prompt
# when web isn't actually usable (e.g. an explicit backend configured
# without its credentials). Lazy import: web_tools is heavy and most
# tools_config callers don't need it.
from tools.web_tools import check_web_api_key
return not check_web_api_key()
web_cfg = config.get("web", {})
return not isinstance(web_cfg, dict) or "backend" not in web_cfg
if ts_key == "browser":
browser_cfg = config.get("browser", {})
return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg

View file

@ -1,20 +1,14 @@
"""Parallel.ai web search + content extraction — plugin form.
Subclasses :class:`agent.web_search_provider.WebSearchProvider`.
Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Uses two
distinct Parallel SDK clients:
Search runs on one of two transports, picked by credential:
- ``Parallel`` (sync) for :meth:`search`
- ``AsyncParallel`` (async) for :meth:`extract`
- **No key ** the free hosted Search MCP at ``https://search.parallel.ai/mcp``
(anonymous Streamable-HTTP JSON-RPC). This makes ``web_search`` work out of
the box with zero setup, which is why ``parallel`` is the keyless default
backend in :func:`tools.web_tools._get_backend`.
- **``PARALLEL_API_KEY`` ** the ``parallel`` SDK's v1 ``search`` / ``extract``
REST endpoints (objective-tuned, mode-selectable, higher rate limits).
Extract mirrors search: keyed uses the async SDK (``AsyncParallel``) v1
``extract``; keyless uses the free MCP's ``web_fetch``. :meth:`extract` is
declared ``async def`` and the dispatcher in
:func:`tools.web_tools.web_extract_tool` detects coroutines via
This is the first plugin to exercise the **async-extract** code path in
the ABC: :meth:`extract` is declared ``async def``, and the dispatcher
in :func:`tools.web_tools.web_extract_tool` detects coroutines via
:func:`inspect.iscoroutinefunction` and awaits.
Config keys this provider responds to::
@ -23,66 +17,25 @@ Config keys this provider responds to::
search_backend: "parallel" # explicit per-capability
extract_backend: "parallel" # explicit per-capability
backend: "parallel" # shared fallback
# Optional: search mode (default "advanced"; also "basic")
# via the PARALLEL_SEARCH_MODE env var. REST path only.
# Optional: search mode (default "agentic"; also "fast" or "one-shot")
# via the PARALLEL_SEARCH_MODE env var.
Env vars::
PARALLEL_API_KEY=... # https://parallel.ai (optional — unlocks
# the v1 REST Search API; without it,
# search and extract use the free MCP)
PARALLEL_SEARCH_MODE=advanced # optional: basic|advanced (legacy
# fast/one-shot map to basic, agentic to
# advanced). REST path only.
PARALLEL_API_KEY=... # https://parallel.ai (required)
PARALLEL_SEARCH_MODE=agentic # optional: agentic|fast|one-shot
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import uuid
from typing import Any, Dict, List
import httpx
from agent.web_search_provider import WebSearchProvider
logger = logging.getLogger(__name__)
# Free hosted Search MCP — anonymous-friendly, used when no PARALLEL_API_KEY is
# configured. Docs: https://docs.parallel.ai/integrations/mcp/search-mcp
_MCP_SEARCH_URL = "https://search.parallel.ai/mcp"
_MCP_PROTOCOL_VERSION = "2025-06-18"
# Deliberately generic client identity. Project policy (see the telemetry PR
# policy in AGENTS.md) forbids third-party usage attribution without an
# explicit user opt-in, so neither clientInfo nor the User-Agent names
# hermes. MCP requires *a* clientInfo; a neutral one satisfies the spec
# without attributing traffic.
_MCP_CLIENT_NAME = "mcp-web-client"
_MCP_CLIENT_VERSION = "1.0.0"
_MCP_USER_AGENT = f"{_MCP_CLIENT_NAME}/{_MCP_CLIENT_VERSION}"
_MCP_TIMEOUT_SECONDS = 30.0
# Free-tier attribution. The hosted Search MCP is free to use; surfacing this
# on keyless results credits Parallel and matches the free-tier terms
# (https://parallel.ai/customer-terms).
_FREE_MCP_ATTRIBUTION = (
"Search powered by the free Parallel Web Search MCP (https://parallel.ai)."
)
def _new_session_id() -> str:
"""Mint a fresh Parallel ``session_id`` for a single tool call.
Per-call rather than process-global: one process serves many unrelated
chats in the gateway/batch runners, and a shared id would pool their
searches into one Parallel session. The prefix is deliberately generic
(no hermes attribution telemetry policy).
"""
return f"{_MCP_CLIENT_NAME}-{uuid.uuid4().hex}"
# Module-level note: the canonical cache slots ``_parallel_client`` and
# ``_async_parallel_client`` live on :mod:`tools.web_tools` so tests that do
# ``tools.web_tools._parallel_client = None`` between cases see fresh state.
@ -180,319 +133,11 @@ _get_async_parallel_client = _get_async_client
def _resolve_search_mode() -> str:
"""Return the validated v1 search mode (default "advanced").
V1 collapses the three Beta modes into two. We accept the v1 values
directly and map the legacy Beta values for back-compat with anyone who
still sets ``PARALLEL_SEARCH_MODE=fast|one-shot|agentic``:
- ``fast`` / ``one-shot`` ``basic`` (lower latency)
- ``agentic`` ``advanced`` (higher quality, the v1 default)
"""
mode = os.getenv("PARALLEL_SEARCH_MODE", "advanced").lower().strip()
if mode == "basic" or mode in {"fast", "one-shot"}:
return "basic"
# advanced, legacy "agentic", and anything unrecognized → the v1 default.
return "advanced"
# ---------------------------------------------------------------------------
# Free Search MCP transport (keyless path)
# ---------------------------------------------------------------------------
#
# A small hand-rolled Streamable-HTTP JSON-RPC client for the hosted Search
# MCP, rather than the full MCP-client subsystem: we only call two tools
# (``web_search`` / ``web_fetch``), so keeping it inline lets web_search and
# web_extract stay ordinary tools with the MCP endpoint as just their wire
# protocol.
def _mcp_headers(
session_id: str | None,
api_key: str | None,
protocol_version: str | None = None,
) -> Dict[str, str]:
"""Headers for an MCP request.
A Bearer token is attached only when we actually hold a key the free
endpoint is anonymous, and sending an empty/garbage token would make it
401 instead of serving the anonymous tier. After ``initialize`` the
Streamable-HTTP spec expects the negotiated ``MCP-Protocol-Version`` on
every follow-up request, so we echo it once known.
"""
headers = {
"Content-Type": "application/json",
"Accept": "application/json, text/event-stream",
"User-Agent": _MCP_USER_AGENT,
}
if session_id:
headers["Mcp-Session-Id"] = session_id
if protocol_version:
headers["MCP-Protocol-Version"] = protocol_version
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
return headers
def _iter_mcp_messages(text: str):
"""Yield JSON-RPC message dicts from a plain-JSON or SSE response body.
Handles ``application/json`` (a single object) and ``text/event-stream``
(SSE: events separated by blank lines; an event's one-or-more ``data:``
lines concatenate into a single JSON payload). Unparseable chunks and
non-``data`` SSE fields (``event:``/``id:``/comments) are skipped.
"""
def _emit(payload):
# Streamable HTTP allows batching responses/notifications into a JSON
# array — flatten so callers always see individual message dicts.
if isinstance(payload, list):
yield from payload
elif payload is not None:
yield payload
body = (text or "").strip()
if not body:
return
if body.startswith("{") or body.startswith("["):
try:
parsed = json.loads(body)
except json.JSONDecodeError:
return
yield from _emit(parsed)
return
data_lines: List[str] = []
def _flush():
if not data_lines:
return None
try:
return json.loads("\n".join(data_lines))
except json.JSONDecodeError:
return None
for raw in body.split("\n"):
line = raw.rstrip("\r")
if line.startswith("data:"):
data_lines.append(line[len("data:"):].lstrip())
elif line.strip() == "": # event boundary
yield from _emit(_flush())
data_lines = []
yield from _emit(_flush())
def _mcp_response_envelope(text: str, request_id: str) -> Dict[str, Any]:
"""Select the JSON-RPC response for *request_id* from an MCP response body.
Streamable-HTTP servers may emit progress/log notifications before the
final result, so we scan the whole stream and return the result/error
message whose ``id`` matches our request. Falls back to the last
result/error-bearing message if no id matches; ``{}`` if none is present.
"""
fallback: Dict[str, Any] = {}
for msg in _iter_mcp_messages(text):
if not isinstance(msg, dict) or not ("result" in msg or "error" in msg):
continue
if msg.get("id") == request_id:
return msg
fallback = msg
return fallback
def _mcp_payload(envelope: Dict[str, Any]) -> Dict[str, Any]:
"""Extract the tool result payload from a ``tools/call`` envelope.
Prefers ``structuredContent`` (authoritative machine-readable form);
otherwise scans text blocks for the first JSON-parseable one. Raises on a
JSON-RPC error or a tool-level ``isError``.
"""
if "error" in envelope:
raise RuntimeError(f"Parallel MCP error: {str(envelope['error'])[:500]}")
result = envelope.get("result") or {}
if result.get("isError"):
raise RuntimeError(f"Parallel MCP tool error: {str(result)[:500]}")
structured = result.get("structuredContent")
if isinstance(structured, dict):
return structured
for block in result.get("content", []) or []:
if isinstance(block, dict) and block.get("type") == "text":
text = str(block.get("text") or "")
if not text:
continue
try:
return json.loads(text)
except json.JSONDecodeError:
continue
raise RuntimeError(
f"Parallel MCP returned no parseable content: {str(result)[:500]}"
)
def _mcp_call(
tool_name: str, arguments: Dict[str, Any], api_key: str | None
) -> Dict[str, Any]:
"""Run the MCP handshake then a single ``tools/call`` and return its payload.
initialize (capture ``Mcp-Session-Id``) notifications/initialized
tools/call ``tool_name``. Returns the parsed tool payload dict (see
:func:`_mcp_payload`). A Bearer token is attached only when *api_key* is set.
"""
with httpx.Client(timeout=_MCP_TIMEOUT_SECONDS) as client:
# 1. initialize — capture the server-assigned MCP session id.
init_id = str(uuid.uuid4())
init = client.post(
_MCP_SEARCH_URL,
headers=_mcp_headers(None, api_key),
json={
"jsonrpc": "2.0",
"id": init_id,
"method": "initialize",
"params": {
"protocolVersion": _MCP_PROTOCOL_VERSION,
"capabilities": {},
"clientInfo": {
"name": _MCP_CLIENT_NAME,
"version": _MCP_CLIENT_VERSION,
},
},
},
)
init.raise_for_status()
# Only echo a session id the server actually issued. Stateless
# Streamable-HTTP servers may omit it; inventing one and sending it on
# follow-up requests can get those requests rejected (the server never
# created that session). When absent, the Mcp-Session-Id header is simply
# omitted (see _mcp_headers). This is separate from the tool-arg
# ``session_id`` below, which is a client-minted rate-limit/grouping id.
mcp_session_id = init.headers.get("mcp-session-id")
init_env = _mcp_response_envelope(init.text, init_id)
# Echo the negotiated protocol version on every post-init request, per
# the Streamable-HTTP spec (servers may enforce it).
negotiated_version = (
(init_env.get("result") or {}).get("protocolVersion")
or _MCP_PROTOCOL_VERSION
)
# 2. notifications/initialized — required handshake ack.
client.post(
_MCP_SEARCH_URL,
headers=_mcp_headers(mcp_session_id, api_key, negotiated_version),
json={"jsonrpc": "2.0", "method": "notifications/initialized"},
)
# 3. tools/call.
call_id = str(uuid.uuid4())
call = client.post(
_MCP_SEARCH_URL,
headers=_mcp_headers(mcp_session_id, api_key, negotiated_version),
json={
"jsonrpc": "2.0",
"id": call_id,
"method": "tools/call",
"params": {"name": tool_name, "arguments": arguments},
},
)
call.raise_for_status()
return _mcp_payload(_mcp_response_envelope(call.text, call_id))
def _mcp_web_search(query: str, limit: int, api_key: str | None) -> Dict[str, Any]:
"""Run a ``web_search`` tool call against the hosted Search MCP.
Returns the standard provider search shape
(``{"success": True, "data": {"web": [...]}}``). The MCP serves a fixed
result count, so ``limit`` is applied client-side. The MCP requires
``objective`` (REST treats it as optional), so we mirror the query.
"""
payload = _mcp_call(
"web_search",
{
"objective": query,
"search_queries": [query],
"session_id": _new_session_id(),
},
api_key,
)
web_results: List[Dict[str, Any]] = []
for i, result in enumerate((payload.get("results") or [])[: max(limit, 1)]):
if not isinstance(result, dict):
continue
excerpts = result.get("excerpts") or []
web_results.append(
{
"url": result.get("url") or "",
"title": result.get("title") or "",
"description": " ".join(excerpts) if excerpts else "",
"position": i + 1,
}
)
# Credit the free tier (anonymous path only — keyed search uses REST and
# carries no attribution).
return {
"success": True,
"data": {"web": web_results},
"provider": "parallel",
"attribution": _FREE_MCP_ATTRIBUTION,
}
def _mcp_web_fetch(urls: List[str], api_key: str | None) -> List[Dict[str, Any]]:
"""Run a ``web_fetch`` tool call against the hosted Search MCP.
Returns the per-URL extract shape that
:func:`tools.web_tools.web_extract_tool` expects exactly one row per input
URL, in request order (including duplicates). We pass ``full_content=True``
so the page body comes back as markdown (matching the keyed SDK path and
what extract callers/summarizers expect), falling back to excerpts only when
full content is absent. Any input the MCP didn't return is emitted as a
per-URL error row.
"""
payload = _mcp_call(
"web_fetch",
{"urls": list(urls), "full_content": True, "session_id": _new_session_id()},
api_key,
)
# Index the response by URL, then emit one row per *input* URL in order so
# duplicates and positional alignment with the request list are preserved.
by_url: Dict[str, Dict[str, Any]] = {}
for item in payload.get("results") or []:
if isinstance(item, dict) and item.get("url"):
by_url.setdefault(item["url"], item)
results: List[Dict[str, Any]] = []
for url in urls:
item = by_url.get(url)
if item is None:
results.append(
{
"url": url,
"title": "",
"content": "",
"error": "extraction failed (no content returned)",
"metadata": {"sourceURL": url},
}
)
continue
title = item.get("title") or ""
# Prefer the full page body; fall back to joined excerpts (mirrors the
# keyed SDK extract path).
content = item.get("full_content") or "\n\n".join(item.get("excerpts") or [])
results.append(
{
"url": url,
"title": title,
"content": content,
"raw_content": content,
"metadata": {"sourceURL": url, "title": title},
}
)
return results
"""Return the validated PARALLEL_SEARCH_MODE value (default "agentic")."""
mode = os.getenv("PARALLEL_SEARCH_MODE", "agentic").lower().strip()
if mode not in {"fast", "one-shot", "agentic"}:
mode = "agentic"
return mode
class ParallelWebSearchProvider(WebSearchProvider):
@ -507,14 +152,7 @@ class ParallelWebSearchProvider(WebSearchProvider):
return "Parallel"
def is_available(self) -> bool:
"""Return True when ``PARALLEL_API_KEY`` is set.
Deliberately key-based: this gates the registry's active-provider walk
and the ``hermes tools`` picker (auto-selecting Parallel for a user who
hasn't named it), so it must not claim availability on the keyless path.
The keyless free-MCP path is reached independently via
:func:`tools.web_tools._get_backend`'s ``parallel`` terminal default.
"""
"""Return True when ``PARALLEL_API_KEY`` is set to a non-empty value."""
return bool(os.getenv("PARALLEL_API_KEY", "").strip())
def supports_search(self) -> bool:
@ -526,11 +164,9 @@ class ParallelWebSearchProvider(WebSearchProvider):
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
"""Execute a Parallel search (sync).
With ``PARALLEL_API_KEY`` set, uses the v1 ``search`` REST endpoint with
the configured mode (``PARALLEL_SEARCH_MODE`` env var, default
"advanced"; limit requested via advanced_settings.max_results, capped at
20). Without a key, falls back to the free hosted Search MCP so search
still works with zero setup.
Uses the ``beta.search`` endpoint with the configured mode
(``PARALLEL_SEARCH_MODE`` env var, default "agentic"). Limit is
capped at 20 server-side.
"""
try:
from tools.interrupt import is_interrupted
@ -538,31 +174,19 @@ class ParallelWebSearchProvider(WebSearchProvider):
if is_interrupted():
return {"success": False, "error": "Interrupted"}
api_key = os.getenv("PARALLEL_API_KEY", "").strip()
if not api_key:
logger.info(
"Parallel search (free MCP): '%s' (limit=%d)", query, limit
)
return _mcp_web_search(query, limit, api_key=None)
mode = _resolve_search_mode()
logger.info(
"Parallel search (v1 REST): '%s' (mode=%s, limit=%d)",
query, mode, limit,
"Parallel search: '%s' (mode=%s, limit=%d)", query, mode, limit
)
# v1 Search API. Request the caller's limit via max_results (capped
# at 20) so we don't rely on the API default — the slice below can
# only trim, not ask for more.
response = _get_sync_client().search(
response = _get_sync_client().beta.search(
search_queries=[query],
objective=query,
mode=mode,
session_id=_new_session_id(),
advanced_settings={"max_results": min(max(limit, 1), 20)},
max_results=min(limit, 20),
)
web_results = []
for i, result in enumerate((response.results or [])[: max(limit, 1)]):
for i, result in enumerate(response.results or []):
excerpts = result.excerpts or []
web_results.append(
{
@ -573,8 +197,6 @@ class ParallelWebSearchProvider(WebSearchProvider):
}
)
# Paid/REST path: no attribution and no "[Parallel]" label — the
# branding is specifically for the free Search MCP tier.
return {"success": True, "data": {"web": web_results}}
except ValueError as exc:
return {"success": False, "error": str(exc)}
@ -590,12 +212,7 @@ class ParallelWebSearchProvider(WebSearchProvider):
async def extract(
self, urls: List[str], **kwargs: Any
) -> List[Dict[str, Any]]:
"""Extract content from one or more URLs.
With ``PARALLEL_API_KEY`` set, uses the async SDK's v1 ``extract`` for
full page content. Without a key, falls back to the free hosted Search
MCP's ``web_fetch`` tool so extraction works with zero setup, mirroring
the keyless search path.
"""Extract content from one or more URLs via the async SDK.
Returns the legacy list-of-results shape that
:func:`tools.web_tools.web_extract_tool` expects: one entry per
@ -610,21 +227,10 @@ class ParallelWebSearchProvider(WebSearchProvider):
{"url": u, "error": "Interrupted", "title": ""} for u in urls
]
api_key = os.getenv("PARALLEL_API_KEY", "").strip()
if not api_key:
logger.info(
"Parallel extract (free MCP web_fetch): %d URL(s)", len(urls)
)
# _mcp_web_fetch is sync httpx; run off the event loop.
return await asyncio.to_thread(_mcp_web_fetch, list(urls), None)
logger.info("Parallel extract (v1 REST): %d URL(s)", len(urls))
# v1 Extract API (client.extract, /v1/extract); full_content is set
# via advanced_settings.
response = await _get_async_client().extract(
logger.info("Parallel extract: %d URL(s)", len(urls))
response = await _get_async_client().beta.extract(
urls=urls,
advanced_settings={"full_content": True},
session_id=_new_session_id(),
full_content=True,
)
results: List[Dict[str, Any]] = []
@ -645,20 +251,13 @@ class ParallelWebSearchProvider(WebSearchProvider):
)
for error in response.errors or []:
err_url = getattr(error, "url", "") or ""
err_msg = (
getattr(error, "message", None)
or getattr(error, "content", None)
or getattr(error, "error_type", None)
or "extraction failed"
)
results.append(
{
"url": err_url,
"url": error.url or "",
"title": "",
"content": "",
"error": err_msg,
"metadata": {"sourceURL": err_url},
"error": error.content or error.error_type or "extraction failed",
"metadata": {"sourceURL": error.url or ""},
}
)
@ -680,16 +279,12 @@ class ParallelWebSearchProvider(WebSearchProvider):
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Parallel",
"badge": "free",
"tag": (
"Free web search + extraction via Parallel's hosted Search MCP "
"— no key needed. Add PARALLEL_API_KEY for the v1 REST Search "
"API (richer modes, higher limits)."
),
"badge": "paid",
"tag": "Objective-tuned search + parallel page extraction.",
"env_vars": [
{
"key": "PARALLEL_API_KEY",
"prompt": "Parallel API key (optional — unlocks the v1 REST Search API)",
"prompt": "Parallel API key",
"url": "https://parallel.ai",
},
],

View file

@ -123,7 +123,7 @@ anthropic = ["anthropic==0.87.0"] # CVE-2026-34450, CVE-2026-34452
# search provider (configured via `hermes tools` or config.yaml).
exa = ["exa-py==2.10.2"]
firecrawl = ["firecrawl-py==4.17.0"]
parallel-web = ["parallel-web==0.6.0"]
parallel-web = ["parallel-web==0.4.2"]
# Image generation backends
fal = ["fal-client==0.13.1"]
# Edge TTS — default TTS provider but still optional (users can pick

View file

@ -12,7 +12,6 @@ from agent.display import (
set_tool_preview_max_len,
_render_inline_unified_diff,
_summarize_rendered_diff_sections,
_used_free_parallel,
render_edit_diff_with_delta,
)
@ -172,46 +171,6 @@ class TestCuteToolMessagePreviewLength:
assert "[error]" not in line
class TestWebProviderLabel:
"""The free-path "Parallel search"/"Parallel fetch" verb labeling."""
def test_free_search_verb_is_parallel(self):
result = json.dumps({"success": True, "data": {"web": []}, "provider": "parallel"})
line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1, result=result)
assert "Parallel search" in line
assert "hello" in line
def test_paid_search_verb_is_plain(self):
result = json.dumps({"success": True, "data": {"web": [{"url": "u"}]}})
line = get_cute_tool_message("web_search", {"query": "hi"}, 0.1, result=result)
assert "Parallel" not in line
assert "search" in line
def test_missing_result_verb_is_plain(self):
line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1)
assert "Parallel" not in line
assert "search" in line
def test_helper_is_parallel_free_specific(self):
# Only Parallel's free MCP path marks results; nothing else does.
assert _used_free_parallel(json.dumps({"provider": "parallel"})) is True
assert _used_free_parallel(json.dumps({"provider": "exa"})) is False
assert _used_free_parallel(json.dumps({"provider": "firecrawl"})) is False
assert _used_free_parallel(json.dumps({"success": True, "data": {}})) is False
assert _used_free_parallel('not json') is False
assert _used_free_parallel(None) is False
def test_free_extract_verb_is_parallel(self):
result = json.dumps({"results": [{"url": "u", "content": "x"}], "provider": "parallel"})
line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result)
assert "Parallel fetch" in line
def test_paid_extract_verb_is_plain(self):
result = json.dumps({"results": [{"url": "u", "content": "x"}]})
line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result)
assert "Parallel" not in line
class TestEditDiffPreview:
def test_extract_edit_diff_for_patch(self):
diff = extract_edit_diff("patch", '{"success": true, "diff": "--- a/x\\n+++ b/x\\n"}')

View file

@ -975,19 +975,6 @@ def test_toolset_has_keys_treats_no_key_providers_as_configured():
assert _toolset_has_keys("computer_use", config) is True
def test_web_no_prompt_when_usable_keyless():
"""Fresh install: web works via the free Parallel MCP, so enabling the web
toolset should not force provider setup."""
with patch("tools.web_tools.check_web_api_key", return_value=True):
assert _toolset_needs_configuration_prompt("web", {}) is False
def test_web_no_prompt_when_extract_backend_is_extract_capable():
with patch("tools.web_tools.check_web_api_key", return_value=True):
cfg = {"web": {"extract_backend": "parallel"}}
assert _toolset_needs_configuration_prompt("web", cfg) is False
def test_computer_use_needs_configuration_when_cua_driver_post_setup_pending():
"""No-key providers can still need setup when their post_setup is unsatisfied.

View file

@ -1,383 +0,0 @@
"""Keyless Parallel search via the free hosted Search MCP.
Covers the transport added in ``plugins/web/parallel/provider.py`` that lets
``web_search`` work with no ``PARALLEL_API_KEY``:
- ``_mcp_headers`` Bearer attached only when a key is held
- ``_decode_mcp_envelope`` plain-JSON and SSE (``data:``) response bodies
- ``_mcp_payload`` structuredContent preferred, text-block JSON fallback, errors
- ``_mcp_web_search`` full handshake (mocked transport) standard search shape
- ``ParallelWebSearchProvider.search`` keyless path routes to the MCP
"""
from __future__ import annotations
import asyncio
import json
from unittest.mock import patch
import pytest
import plugins.web.parallel.provider as pp
# ─── _mcp_headers ──────────────────────────────────────────────────────────
class TestMcpHeaders:
def test_anonymous_has_no_authorization(self):
h = pp._mcp_headers(session_id=None, api_key=None)
assert "Authorization" not in h
assert h["Accept"] == "application/json, text/event-stream"
assert "Mcp-Session-Id" not in h
def test_user_agent_is_generic_not_hermes(self):
# Telemetry policy: no third-party usage attribution without opt-in.
# The UA must be set (not python-httpx default) but must not name
# hermes, on both the anonymous and keyed paths.
for ua in (
pp._mcp_headers(session_id=None, api_key=None)["User-Agent"],
pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"],
):
assert ua == f"{pp._MCP_CLIENT_NAME}/{pp._MCP_CLIENT_VERSION}"
assert "hermes" not in ua.lower()
def test_session_id_and_bearer_when_present(self):
h = pp._mcp_headers(session_id="sid-123", api_key="pk-live")
assert h["Mcp-Session-Id"] == "sid-123"
assert h["Authorization"] == "Bearer pk-live"
# ─── SSE / JSON-RPC parsing ──────────────────────────────────────────────────
class TestMcpResponseParsing:
def test_plain_json_matched_by_id(self):
body = '{"jsonrpc":"2.0","id":"abc","result":{"ok":true}}'
assert pp._mcp_response_envelope(body, "abc")["result"]["ok"] is True
def test_sse_selects_response_for_request_id_skipping_notifications(self):
# A progress notification (no id) precedes the real result; an unrelated
# response id is also present. We must pick the one matching our id.
body = (
'event: message\ndata: {"jsonrpc":"2.0","method":"notifications/progress","params":{"p":1}}\n\n'
'event: message\ndata: {"jsonrpc":"2.0","id":"other","result":{"ok":false}}\n\n'
'event: message\ndata: {"jsonrpc":"2.0","id":"req-1","result":{"ok":true}}\n\n'
)
env = pp._mcp_response_envelope(body, "req-1")
assert env["result"]["ok"] is True
def test_sse_multiline_data_concatenated(self):
body = 'data: {"jsonrpc":"2.0","id":"x",\ndata: "result":{"n":42}}\n\n'
assert pp._mcp_response_envelope(body, "x")["result"]["n"] == 42
def test_falls_back_to_last_result_when_id_absent(self):
body = '{"jsonrpc":"2.0","id":"server-chose","result":{"ok":true}}'
# request id doesn't match, but there's a single result → use it
assert pp._mcp_response_envelope(body, "mismatch")["result"]["ok"] is True
def test_empty_body(self):
assert pp._mcp_response_envelope("", "x") == {}
assert pp._mcp_response_envelope(" ", "x") == {}
def test_batched_json_array_flattened(self):
# Streamable HTTP may batch messages into a JSON array.
body = ('[{"jsonrpc":"2.0","method":"notifications/progress"},'
'{"jsonrpc":"2.0","id":"req-9","result":{"ok":true}}]')
assert pp._mcp_response_envelope(body, "req-9")["result"]["ok"] is True
def test_batched_sse_data_array_flattened(self):
body = 'data: [{"jsonrpc":"2.0","id":"a","result":{"n":1}}]\n\n'
assert pp._mcp_response_envelope(body, "a")["result"]["n"] == 1
# ─── _mcp_payload ────────────────────────────────────────────────────────────
class TestMcpPayload:
def test_prefers_structured_content(self):
env = {"result": {"structuredContent": {"results": [{"url": "u"}]},
"content": [{"type": "text", "text": "ignored"}]}}
assert pp._mcp_payload(env) == {"results": [{"url": "u"}]}
def test_parses_text_block_json(self):
inner = {"search_id": "s1", "results": [{"url": "u", "title": "t"}]}
env = {"result": {"content": [{"type": "text", "text": json.dumps(inner)}]}}
assert pp._mcp_payload(env)["search_id"] == "s1"
def test_raises_on_jsonrpc_error(self):
with pytest.raises(RuntimeError, match="Parallel MCP error"):
pp._mcp_payload({"error": {"code": -32000, "message": "boom"}})
def test_raises_on_tool_iserror(self):
with pytest.raises(RuntimeError, match="Parallel MCP tool error"):
pp._mcp_payload({"result": {"isError": True, "content": []}})
# ─── _mcp_web_search (mocked transport) ──────────────────────────────────────
class _FakeResponse:
def __init__(self, *, text="", headers=None):
self.text = text
self.headers = headers or {}
def raise_for_status(self):
return None
class _FakeClient:
"""Stands in for httpx.Client: replays init → ack → tools/call."""
def __init__(self, search_payload, init_session_id="server-sid"):
self._search_payload = search_payload
self._init_session_id = init_session_id
self.calls = []
def __enter__(self):
return self
def __exit__(self, *exc):
return False
def post(self, url, headers=None, json=None):
self.calls.append({"headers": headers, "json": json})
req = json or {}
method = req.get("method")
req_id = req.get("id")
if method == "initialize":
# Echo the request id, as the real server does.
return _FakeResponse(
text=json_dumps({"jsonrpc": "2.0", "id": req_id,
"result": {"protocolVersion": "2099-01-01"}}),
headers=(
{"mcp-session-id": self._init_session_id}
if self._init_session_id is not None
else {}
),
)
if method == "notifications/initialized":
return _FakeResponse(text="")
# tools/call
envelope = {"jsonrpc": "2.0", "id": req_id, "result": {
"content": [{"type": "text", "text": json_dumps(self._search_payload)}],
}}
return _FakeResponse(text=json_dumps(envelope))
def json_dumps(obj):
return json.dumps(obj)
class TestMcpWebSearch:
def _payload(self, n):
return {"search_id": "s", "results": [
{"url": f"https://ex/{i}", "title": f"t{i}",
"excerpts": [f"a{i}", f"b{i}"]}
for i in range(n)
]}
def test_returns_standard_shape_and_handshake(self):
fake = _FakeClient(self._payload(3))
with patch.object(pp.httpx, "Client", return_value=fake):
out = pp._mcp_web_search("hello", limit=5, api_key=None)
assert out["success"] is True
# Free-tier results credit Parallel.
assert "Parallel" in out["attribution"]
web = out["data"]["web"]
assert [r["position"] for r in web] == [1, 2, 3]
assert web[0]["url"] == "https://ex/0"
assert web[0]["description"] == "a0 b0" # excerpts joined
# handshake order
methods = [c["json"].get("method") for c in fake.calls]
assert methods == ["initialize", "notifications/initialized", "tools/call"]
# session id from the initialize response header is reused
assert fake.calls[-1]["headers"]["Mcp-Session-Id"] == "server-sid"
def test_stateless_server_no_session_header_not_invented(self):
# A stateless Streamable-HTTP server may omit mcp-session-id on
# initialize; we must NOT invent one (sending an unissued session id can
# get follow-up requests rejected). The follow-ups carry no header.
fake = _FakeClient(self._payload(1), init_session_id=None)
with patch.object(pp.httpx, "Client", return_value=fake):
out = pp._mcp_web_search("hello", limit=5, api_key=None)
assert out["success"] is True
follow_ups = [c for c in fake.calls if c["json"].get("method") != "initialize"]
assert follow_ups, "expected notifications/initialized + tools/call"
assert all("Mcp-Session-Id" not in c["headers"] for c in follow_ups)
# anonymous → no Authorization on any call
assert all("Authorization" not in c["headers"] for c in fake.calls)
# tools/call mirrors query into objective + search_queries
args = fake.calls[-1]["json"]["params"]["arguments"]
assert args["objective"] == "hello"
assert args["search_queries"] == ["hello"]
def test_limit_is_applied_client_side(self):
fake = _FakeClient(self._payload(10))
with patch.object(pp.httpx, "Client", return_value=fake):
out = pp._mcp_web_search("q", limit=2, api_key=None)
assert len(out["data"]["web"]) == 2
def test_bearer_attached_when_key_present(self):
fake = _FakeClient(self._payload(1))
with patch.object(pp.httpx, "Client", return_value=fake):
pp._mcp_web_search("q", limit=1, api_key="pk-live")
assert all(c["headers"]["Authorization"] == "Bearer pk-live" for c in fake.calls)
def test_negotiated_protocol_version_echoed_post_init(self):
fake = _FakeClient(self._payload(1))
with patch.object(pp.httpx, "Client", return_value=fake):
pp._mcp_web_search("q", limit=1, api_key=None)
# initialize request doesn't carry the (not-yet-negotiated) version...
assert "MCP-Protocol-Version" not in fake.calls[0]["headers"]
# ...but notifications/initialized and tools/call echo the negotiated one.
assert fake.calls[1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
assert fake.calls[-1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
# ─── provider.search keyless routing ─────────────────────────────────────────
class TestProviderKeylessSearch:
def test_search_without_key_uses_mcp(self, monkeypatch):
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
captured = {}
def _fake(query, limit, api_key):
captured.update(query=query, limit=limit, api_key=api_key)
return {"success": True, "data": {"web": []}}
monkeypatch.setattr(pp, "_mcp_web_search", _fake)
out = pp.ParallelWebSearchProvider().search("kittens", limit=4)
assert out["success"] is True
assert captured == {"query": "kittens", "limit": 4, "api_key": None}
def test_is_available_reflects_key(self, monkeypatch):
# is_available() gates the registry's active-provider walk + picker, so
# it's key-based (keyless dispatch is handled by _get_backend, not this).
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
assert pp.ParallelWebSearchProvider().is_available() is False
monkeypatch.setenv("PARALLEL_API_KEY", "k")
assert pp.ParallelWebSearchProvider().is_available() is True
# ─── web_fetch (keyless extract) ─────────────────────────────────────────────
class TestMcpWebFetch:
def _payload(self, urls):
return {"extract_id": "e1", "results": [
{"url": u, "title": f"T{i}", "publish_date": None,
"excerpts": [f"chunk-a-{i}", f"chunk-b-{i}"]}
for i, u in enumerate(urls)
]}
def test_maps_to_extract_shape(self):
urls = ["https://a.test", "https://b.test"]
fake = _FakeClient(self._payload(urls))
with patch.object(pp.httpx, "Client", return_value=fake):
out = pp._mcp_web_fetch(urls, api_key=None)
assert [r["url"] for r in out] == urls
assert out[0]["content"] == "chunk-a-0\n\nchunk-b-0"
assert out[0]["raw_content"] == out[0]["content"]
assert out[0]["metadata"] == {"sourceURL": "https://a.test", "title": "T0"}
# tools/call targeted web_fetch, requesting full page bodies.
args = fake.calls[-1]["json"]["params"]
assert args["name"] == "web_fetch"
assert args["arguments"]["urls"] == urls
assert args["arguments"]["full_content"] is True
assert args["arguments"]["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-")
def test_prefers_full_content_over_excerpts(self):
payload = {"results": [
{"url": "https://a.test", "title": "T",
"excerpts": ["snippet"], "full_content": "the entire page body"},
]}
fake = _FakeClient(payload)
with patch.object(pp.httpx, "Client", return_value=fake):
out = pp._mcp_web_fetch(["https://a.test"], api_key=None)
assert out[0]["content"] == "the entire page body"
def test_missing_url_becomes_error_entry(self):
# Server returns only one of the two requested URLs.
fake = _FakeClient(self._payload(["https://a.test"]))
with patch.object(pp.httpx, "Client", return_value=fake):
out = pp._mcp_web_fetch(["https://a.test", "https://missing.test"], api_key=None)
assert len(out) == 2
missing = [r for r in out if r["url"] == "https://missing.test"][0]
assert "error" in missing
assert missing["content"] == ""
def test_preserves_order_and_duplicate_inputs(self):
# MCP returns each unique URL once; output must still be one row per
# input, in order, including the duplicate.
fake = _FakeClient(self._payload(["https://a.test", "https://b.test"]))
urls = ["https://b.test", "https://a.test", "https://b.test"]
with patch.object(pp.httpx, "Client", return_value=fake):
out = pp._mcp_web_fetch(urls, api_key=None)
assert [r["url"] for r in out] == urls # one row per input, in order
assert all("error" not in r for r in out) # all three resolved
def test_extract_without_key_uses_web_fetch(self, monkeypatch):
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
captured = {}
def _fake(urls, api_key):
captured.update(urls=list(urls), api_key=api_key)
return [{"url": urls[0], "title": "", "content": "x",
"raw_content": "x", "metadata": {}}]
monkeypatch.setattr(pp, "_mcp_web_fetch", _fake)
out = asyncio.run(pp.ParallelWebSearchProvider().extract(["https://x.test"]))
assert out[0]["content"] == "x"
assert captured == {"urls": ["https://x.test"], "api_key": None}
# ─── keyed v1 REST search ────────────────────────────────────────────────────
class TestKeyedV1Search:
def test_passes_max_results_and_omits_branding(self, monkeypatch):
monkeypatch.setenv("PARALLEL_API_KEY", "pk-live")
monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
captured = {}
class _Res:
def __init__(self, url):
self.url, self.title, self.excerpts = url, "T", ["x"]
class _Resp:
results = [_Res(f"https://r/{i}") for i in range(10)]
class _Client:
def search(self, **kw):
captured.update(kw)
return _Resp()
monkeypatch.setattr(pp, "_get_sync_client", lambda: _Client())
out = pp.ParallelWebSearchProvider().search("q", limit=7)
assert out["success"] is True
# honors the caller's limit via advanced_settings.max_results
assert captured["advanced_settings"] == {"max_results": 7}
assert captured["mode"] == "advanced" # v1 default
assert captured["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-") # per-call id
assert len(out["data"]["web"]) == 7 # client-side slice
# paid path: no free-tier attribution, no [Parallel] label signal
assert "attribution" not in out
assert "provider" not in out
# ─── v1 search mode mapping ──────────────────────────────────────────────────
class TestResolveSearchMode:
@pytest.mark.parametrize("env,expected", [
(None, "advanced"), # default
("advanced", "advanced"),
("basic", "basic"),
("fast", "basic"), # legacy → basic
("one-shot", "basic"), # legacy → basic
("agentic", "advanced"), # legacy → advanced
("garbage", "advanced"), # invalid → default
("BASIC", "basic"), # case-insensitive
])
def test_mode_mapping(self, monkeypatch, env, expected):
if env is None:
monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
else:
monkeypatch.setenv("PARALLEL_SEARCH_MODE", env)
assert pp._resolve_search_mode() == expected

View file

@ -193,16 +193,11 @@ class TestIsAvailable:
assert p.is_available() is True
def test_parallel_requires_api_key(self, monkeypatch: pytest.MonkeyPatch) -> None:
"""is_available() is key-based — it gates the registry's active-provider
walk/picker. (Keyless search/extract still work via the free MCP through
_get_backend's terminal default, independent of this flag.)
"""
_ensure_plugins_loaded()
from agent.web_search_registry import get_provider
p = get_provider("parallel")
assert p is not None
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
assert p.is_available() is False
monkeypatch.setenv("PARALLEL_API_KEY", "real")
assert p.is_available() is True
@ -427,33 +422,17 @@ class TestErrorResponseShapes:
assert result.get("success") is False
assert "error" in result
def test_parallel_extract_keyless_uses_mcp_web_fetch(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Without a key, extract routes to the free MCP web_fetch tool rather
than erroring. The MCP transport is mocked so the test stays offline."""
def test_parallel_extract_returns_per_url_errors_when_unconfigured(self) -> None:
_ensure_plugins_loaded()
from agent.web_search_registry import get_provider
import plugins.web.parallel.provider as parallel_provider
monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
captured = {}
def _fake_fetch(urls, api_key):
captured["urls"] = list(urls)
captured["api_key"] = api_key
return [{"url": urls[0], "title": "Example", "content": "body",
"raw_content": "body", "metadata": {"sourceURL": urls[0]}}]
monkeypatch.setattr(parallel_provider, "_mcp_web_fetch", _fake_fetch)
p = get_provider("parallel")
assert p is not None
result = asyncio.run(p.extract(["https://example.com"]))
assert isinstance(result, list)
assert len(result) == 1
assert "error" in result[0]
assert result[0]["url"] == "https://example.com"
assert result[0]["content"] == "body"
assert captured == {"urls": ["https://example.com"], "api_key": None}
def test_firecrawl_extract_returns_per_url_errors_when_unconfigured(self) -> None:
_ensure_plugins_loaded()

View file

@ -1,100 +0,0 @@
"""Regression: the keyless Parallel web default must survive a failed sweep.
``web_search`` / ``web_extract`` are documented to work out of the box with
zero setup via the bundled keyless Parallel free-MCP backend. That guarantee
only holds if the bundled ``plugins/web/*`` providers are registered in
``agent.web_search_registry``. The dispatch triggers the general plugin sweep
(:func:`hermes_cli.plugins._ensure_plugins_discovered`) to do that but the
sweep can finish without registering them (its exception swallowed as a
warning, a packaged layout where it ran before the bundled tree was
importable, or a stale empty-discovery cache). When that happened, *both*
tools dead-ended on "No web {search,extract} provider configured" even though
no setup should be needed.
These tests pin the invariant that :func:`tools.web_tools._ensure_web_plugins_loaded`
guarantees the keyless default is registered regardless of the sweep's outcome,
and that the direct-registration fallback honors an explicit ``plugins.disabled``
entry. Real imports from the bundled plugin modules no provider mocking.
"""
from __future__ import annotations
import pytest
import agent.web_search_registry as reg
import hermes_cli.plugins as plugins
from tools import web_tools
@pytest.fixture(autouse=True)
def _clean_registry():
reg._reset_for_tests()
yield
reg._reset_for_tests()
def _boom(*_a, **_k):
raise RuntimeError("discovery boom")
def test_keyless_default_registered_when_discovery_raises(monkeypatch):
"""A swallowed discovery failure must not strand the keyless default."""
monkeypatch.setattr(plugins, "_ensure_plugins_discovered", _boom)
assert reg.get_provider("parallel") is None
web_tools._ensure_web_plugins_loaded()
parallel = reg.get_provider("parallel")
assert parallel is not None, "keyless Parallel default not restored"
# It is the universal keyless default precisely because it does both.
assert parallel.supports_search()
assert parallel.supports_extract()
def test_fallback_registers_full_bundled_set(monkeypatch):
"""The fix covers the whole bundled provider class, not just parallel."""
monkeypatch.setattr(plugins, "_ensure_plugins_discovered", _boom)
web_tools._ensure_web_plugins_loaded()
names = {p.name for p in reg.list_providers()}
# Every bundled backend a user might have configured should be reachable
# again, so an explicit ``web.extract_backend: firecrawl`` etc. resolves.
for expected in ("parallel", "firecrawl", "tavily", "exa"):
assert expected in names, f"{expected} missing after fallback"
def test_fallback_honors_explicit_disable(monkeypatch):
"""A backend the user turned off via plugins.disabled stays off."""
monkeypatch.setattr(plugins, "_get_disabled_plugins", lambda: {"web-parallel"})
web_tools._register_bundled_web_providers_directly()
names = {p.name for p in reg.list_providers()}
assert "parallel" not in names, "explicit disable was ignored"
# Other bundled backends are unaffected by the parallel disable.
assert "tavily" in names
def test_fallback_is_noop_when_discovery_already_registered(monkeypatch):
"""Healthy path: don't pay for the direct sweep when parallel is present."""
# Pretend the general sweep already registered the keyless default.
import importlib
class _Ctx:
def register_web_search_provider(self, provider):
reg.register_provider(provider)
importlib.import_module("plugins.web.parallel").register(_Ctx())
monkeypatch.setattr(plugins, "_ensure_plugins_discovered", lambda *a, **k: None)
calls = {"n": 0}
real = web_tools._register_bundled_web_providers_directly
def _spy():
calls["n"] += 1
real()
monkeypatch.setattr(web_tools, "_register_bundled_web_providers_directly", _spy)
web_tools._ensure_web_plugins_loaded()
assert calls["n"] == 0, "direct-registration ran on the healthy path"

View file

@ -167,21 +167,6 @@ class TestPerCapabilityBackendSelection:
monkeypatch.setenv("TAVILY_API_KEY", "test-key")
assert web_tools._get_search_backend() == "tavily"
def test_explicit_extract_backend_honored_when_unavailable(self, monkeypatch):
"""An explicit per-capability backend is honored even with no creds, so
its setup error surfaces instead of silently rerouting to the keyless
Parallel default (which would send user URLs to a different provider)."""
from tools import web_tools
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
"extract_backend": "firecrawl",
})
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_GATEWAY_URL"):
monkeypatch.delenv(key, raising=False)
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False, raising=False)
# Resolves to firecrawl (not parallel) despite firecrawl being unavailable.
assert web_tools._get_extract_backend() == "firecrawl"
def test_falls_back_to_generic_backend_when_extract_backend_empty(self, monkeypatch):
from tools import web_tools
@ -192,7 +177,7 @@ class TestPerCapabilityBackendSelection:
monkeypatch.setenv("PARALLEL_API_KEY", "test-key")
assert web_tools._get_extract_backend() == "parallel"
def test_explicit_search_backend_honored_when_unavailable(self, monkeypatch):
def test_search_backend_ignored_when_not_available(self, monkeypatch):
from tools import web_tools
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
@ -201,10 +186,8 @@ class TestPerCapabilityBackendSelection:
})
monkeypatch.delenv("EXA_API_KEY", raising=False)
monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
# The explicit per-capability choice (exa) is honored even though it's
# unavailable, so its setup error surfaces — we don't silently reroute
# to the shared backend (or the keyless Parallel default).
assert web_tools._get_search_backend() == "exa"
# Should fall back to firecrawl since exa isn't configured
assert web_tools._get_search_backend() == "firecrawl"
def test_fully_backward_compatible_with_web_backend_only(self, monkeypatch):
from tools import web_tools
@ -308,55 +291,26 @@ class TestUnconfiguredErrorEnvelopeParity:
):
monkeypatch.delenv(k, raising=False)
def test_extract_empty_urls_does_not_raise(self, monkeypatch):
"""Regression: empty (or fully SSRF-blocked) URL sets skip the dispatch
branch; the free-Parallel flag must still be initialized so the tool
returns an error envelope instead of UnboundLocalError."""
import asyncio
from tools import web_tools
self._clear_web_creds(monkeypatch)
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
out = asyncio.run(web_tools.web_extract_tool([], "markdown"))
# The key assertion is that it returns a normal error envelope (a
# string) rather than raising UnboundLocalError.
assert isinstance(out, str)
result = json.loads(out)
assert "error" in result
def test_unconfigured_search_falls_back_to_free_parallel(self, monkeypatch):
"""``web_search_tool`` with no creds routes to Parallel's free Search
MCP rather than erroring. The MCP transport is mocked so the test
stays offline; we assert dispatch landed on parallel and returned the
standard search envelope.
def test_unconfigured_search_emits_top_level_error(self, monkeypatch):
"""``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
matching main's ``tool_error()`` envelope, not a per-result shape.
"""
from tools import web_tools
import plugins.web.parallel.provider as parallel_provider
self._clear_web_creds(monkeypatch)
# Reset firecrawl client cache so the unconfigured state is re-evaluated
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
captured = {}
def _fake_mcp(query, limit, api_key):
captured["query"] = query
captured["api_key"] = api_key
return {
"success": True,
"data": {"web": [
{"url": "https://example.com", "title": "Example",
"description": "hit", "position": 1},
]},
}
monkeypatch.setattr(parallel_provider, "_mcp_web_search", _fake_mcp)
result = json.loads(web_tools.web_search_tool("hello world", limit=3))
assert result.get("success") is True, f"expected success, got {result}"
assert result["data"]["web"][0]["url"] == "https://example.com"
# Keyless path: dispatched to parallel with no Bearer token.
assert captured == {"query": "hello world", "api_key": None}
assert "error" in result, f"expected top-level 'error' key, got {result}"
# ``Error searching web:`` prefix comes from web_tools' top-level except handler
assert "Error searching web:" in result["error"]
assert "FIRECRAWL_API_KEY" in result["error"]
# No per-result burying
assert "results" not in result
class TestDispatchersTriggerPluginDiscovery:

View file

@ -190,11 +190,7 @@ class TestDDGSBackendWiring:
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
assert web_tools._get_backend() == "exa"
def test_auto_detect_prefers_keyless_parallel_over_ddgs(self, monkeypatch):
# With no credentials, keyless Parallel is the auto-detect default even
# when the ddgs package is installed — ddgs is search-only (can't
# extract), so Parallel is preferred so both search and extract work.
# ddgs remains reachable via an explicit web.backend=ddgs.
def test_auto_detect_picks_ddgs_as_last_resort(self, monkeypatch):
from tools import web_tools
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY",
@ -202,7 +198,7 @@ class TestDDGSBackendWiring:
monkeypatch.delenv(key, raising=False)
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
assert web_tools._get_backend() == "parallel"
assert web_tools._get_backend() == "ddgs"
def test_check_web_api_key_true_when_ddgs_configured(self, monkeypatch):
from tools import web_tools

View file

@ -313,9 +313,7 @@ class TestCheckWebApiKey:
)
assert web_tools.check_web_api_key() is True
def test_no_credentials_usable_via_free_parallel(self, monkeypatch):
"""No credentials → check_web_api_key True: the keyless Parallel free MCP
services calls, so web is usable out of the box."""
def test_no_credentials_fails(self, monkeypatch):
from tools import web_tools
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
@ -327,7 +325,7 @@ class TestCheckWebApiKey:
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
assert web_tools.check_web_api_key() is True
assert web_tools.check_web_api_key() is False
# ---------------------------------------------------------------------------

View file

@ -384,14 +384,12 @@ class TestBackendSelection:
patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
assert _get_backend() == "firecrawl"
def test_fallback_no_keys_defaults_to_parallel(self):
"""No credentials, no config → 'parallel' (free Search MCP works
keyless). Selection is purely credential-based."""
def test_fallback_no_keys_defaults_to_firecrawl(self):
"""No keys, no config → 'firecrawl' (will fail at client init)."""
from tools.web_tools import _get_backend
with patch("tools.web_tools._load_web_config", return_value={}), \
patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
patch("tools.web_tools._ddgs_package_importable", return_value=False):
assert _get_backend() == "parallel"
assert _get_backend() == "firecrawl"
def test_invalid_config_falls_through_to_fallback(self):
"""web.backend=invalid → ignored, uses key-based fallback."""
@ -626,73 +624,9 @@ class TestCheckWebApiKey:
from tools.web_tools import check_web_api_key
assert check_web_api_key() is True
def test_no_keys_usable_via_free_parallel(self):
"""No credentials → check_web_api_key True: selection resolves to the
keyless Parallel free MCP, which genuinely services calls (web works out
of the box). check_web_api_key is a usability probe, not a key check."""
def test_no_keys_returns_false(self):
from tools.web_tools import check_web_api_key
with patch("tools.web_tools._load_web_config", return_value={}), \
patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
patch("tools.web_tools._ddgs_package_importable", return_value=False), \
patch.dict(os.environ, {}, clear=False):
for k in ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
"TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"):
os.environ.pop(k, None)
assert check_web_api_key() is True
def test_typo_extract_backend_not_masked_by_parallel(self):
"""A typo'd per-capability backend is honored (so dispatch errors)
rather than silently falling through to keyless Parallel."""
from tools.web_tools import _get_extract_backend, check_web_api_key
with patch("tools.web_tools._load_web_config",
return_value={"extract_backend": "parrallel"}):
assert _get_extract_backend() == "parrallel" # not "parallel"
assert check_web_api_key() is False # unknown → unusable
def test_keyless_parallel_unusable_when_provider_disabled(self):
"""If the bundled web-parallel provider is disabled/unregistered, the
keyless free-MCP path must NOT report web as usable otherwise setup is
skipped but web tools fail at runtime with no provider."""
from tools.web_tools import check_web_api_key
with patch("tools.web_tools._load_web_config", return_value={}), \
patch("tools.web_tools._parallel_provider_registered", return_value=False), \
patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
patch("tools.web_tools.check_firecrawl_api_key", return_value=False), \
patch("tools.web_tools._ddgs_package_importable", return_value=False), \
patch.dict(os.environ, {}, clear=False):
for var in (
"PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
"TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY", "SEARXNG_URL",
):
os.environ.pop(var, None)
assert check_web_api_key() is False
def test_extract_autodetect_skips_search_only_for_keyless_parallel(self):
"""A search-only env credential (SEARXNG_URL) must not shadow the keyless
Parallel free-MCP extract fallback: extract auto-detect skips search-only
backends, so _get_extract_backend resolves to parallel (which can fetch),
while search auto-detect still prefers the configured searxng."""
from tools.web_tools import _get_extract_backend, _get_search_backend
with patch("tools.web_tools._load_web_config", return_value={}), \
patch.dict(os.environ, {}, clear=False):
for var in (
"PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
"TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY",
):
os.environ.pop(var, None)
os.environ["SEARXNG_URL"] = "http://localhost:8080"
with patch("tools.web_tools._is_tool_gateway_ready", return_value=False):
assert _get_search_backend() == "searxng"
assert _get_extract_backend() == "parallel"
def test_configured_but_unavailable_backend_reports_unusable(self):
"""An explicitly configured backend with no creds (exa, no key) →
check_web_api_key False so diagnostics flag the misconfiguration
even though the tools stay registered."""
from tools.web_tools import check_web_api_key
with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
patch.dict(os.environ, {}, clear=False):
os.environ.pop("EXA_API_KEY", None)
with patch("tools.web_tools._ddgs_package_importable", return_value=False):
assert check_web_api_key() is False
def test_both_keys_returns_true(self):
@ -756,18 +690,12 @@ class TestCheckWebApiKey:
assert refresh_calls == []
def test_web_tools_registered_even_when_configured_backend_unavailable(self):
# Registration is unconditional (web_tools_registered) so an explicitly
# configured but unavailable backend (exa without EXA_API_KEY) keeps the
# tools registered to surface exa's setup error at call time — while the
# readiness probe (check_web_api_key) honestly reports not-configured.
from tools.web_tools import web_tools_registered, check_web_api_key
assert web_tools_registered() is True
with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
patch.dict(os.environ, {}, clear=False):
os.environ.pop("EXA_API_KEY", None)
assert web_tools_registered() is True
assert check_web_api_key() is False
def test_configured_backend_must_match_available_provider(self):
with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"):
with patch.dict(os.environ, {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"}, clear=False):
from tools.web_tools import check_web_api_key
assert check_web_api_key() is False
def test_configured_firecrawl_backend_accepts_managed_gateway(self):
with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}):

View file

@ -90,7 +90,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
# ─── Web search backends ───────────────────────────────────────────────
"search.exa": ("exa-py==2.10.2",),
"search.firecrawl": ("firecrawl-py==4.17.0",),
"search.parallel": ("parallel-web==0.6.0",),
"search.parallel": ("parallel-web==0.4.2",),
# ─── TTS providers ─────────────────────────────────────────────────────
# Pinned to exact versions to match pyproject.toml's no-ranges policy

View file

@ -141,35 +141,15 @@ def _load_web_config() -> dict:
except (ImportError, Exception):
return {}
# Recognized web backend names (config values accepted in ``web.backend`` /
# ``web.search_backend`` / ``web.extract_backend``). Kept as a single source of
# truth for config validation across the selection helpers.
_KNOWN_WEB_BACKENDS = frozenset(
{"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}
)
# Backends that only service web_search (their provider's ``supports_extract()``
# is False). They are skipped during *extract* auto-detect so a search-only
# credential (e.g. SEARXNG_URL) does not shadow the keyless Parallel free-MCP
# fallback, which would otherwise leave web_extract broken on a no-key install.
_SEARCH_ONLY_BACKENDS = frozenset({"searxng", "brave-free", "ddgs", "xai"})
def _get_backend(capability: str = "search") -> str:
def _get_backend() -> str:
"""Determine which web backend to use (shared fallback).
Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
Falls back to whichever API key is present for users who configured
keys manually without running setup.
``capability`` ("search" | "extract") only affects auto-detect: for
``extract`` we skip search-only backends (``_SEARCH_ONLY_BACKENDS``) so a
search-only credential never shadows the keyless Parallel free-MCP extract
fallback. An explicit ``web.backend`` value is honored as-is (explicit wins,
surfacing that backend's own search-only error rather than rerouting).
"""
configured = (_load_web_config().get("backend") or "").lower().strip()
if configured in _KNOWN_WEB_BACKENDS:
if configured in {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}:
return configured
# Fallback for manual / legacy config — pick the highest-priority
@ -178,8 +158,7 @@ def _get_backend(capability: str = "search") -> str:
# pre-empted by a Nous OAuth token whose subscription tier may not
# actually grant web-search access (the gateway then fails at runtime
# with "no subscription" and the tool returns an error to the agent
# without falling back). Free-tier backends (searxng / brave-free /
# keyless parallel / ddgs) trail the keyed ones.
# without falling back). Free-tier backends trail the paid ones.
backend_candidates = (
("tavily", _has_env("TAVILY_API_KEY")),
("exa", _has_env("EXA_API_KEY")),
@ -188,24 +167,13 @@ def _get_backend(capability: str = "search") -> str:
("firecrawl", _is_tool_gateway_ready()),
("searxng", _has_env("SEARXNG_URL")),
("brave-free", _has_env("BRAVE_SEARCH_API_KEY")),
# Keyless Parallel free MCP — always available, the intended no-key
# default for both search and extract. Ahead of ddgs (search-only, so it
# can't service web_extract); ddgs stays reachable via web.backend=ddgs.
("parallel", True),
("ddgs", _ddgs_package_importable()),
)
for backend, available in backend_candidates:
if not available:
continue
# For extract, skip search-only backends so the keyless Parallel
# free-MCP fallback (which can fetch URLs) is reached instead.
if capability == "extract" and backend in _SEARCH_ONLY_BACKENDS:
continue
return backend
if available:
return backend
# Defensive terminal (the keyless ``parallel`` candidate above is always
# available, so this is effectively unreachable).
return "parallel"
return "firecrawl" # default (backward compat)
def _get_search_backend() -> str:
@ -236,19 +204,14 @@ def _get_extract_backend() -> str:
def _get_capability_backend(capability: str) -> str:
"""Shared helper for per-capability backend selection.
Reads ``web.{capability}_backend`` from config. Any explicit value is
honored **regardless of availability** including unrecognized typos like
``parrallel`` so the dispatcher surfaces that backend's own setup/config
error rather than silently rerouting to the keyless Parallel default (which
would send user queries to a different provider and hide the
misconfiguration). This matches ``web_search_registry``'s "explicit config
wins" rule. Only an *unset* value falls through to ``_get_backend()``.
Reads ``web.{capability}_backend`` from config; if set and available,
uses it. Otherwise falls through to the shared ``_get_backend()``.
"""
cfg = _load_web_config()
specific = (cfg.get(f"{capability}_backend") or "").lower().strip()
if specific:
if specific and _is_backend_available(specific):
return specific
return _get_backend(capability)
return _get_backend()
def _is_backend_available(backend: str) -> bool:
@ -256,8 +219,6 @@ def _is_backend_available(backend: str) -> bool:
if backend == "exa":
return _has_env("EXA_API_KEY")
if backend == "parallel":
# Credential probe: True only with a real key. The keyless free-MCP
# fallback is handled by _get_backend()'s terminal default, not here.
return _has_env("PARALLEL_API_KEY")
if backend == "firecrawl":
return check_firecrawl_api_key()
@ -810,17 +771,6 @@ def _ensure_web_plugins_loaded() -> None:
Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` exactly:
the underlying discovery call is idempotent and cheap on subsequent
invocations.
Triggering discovery is necessary but not *sufficient*: the sweep can
finish without registering the bundled web providers (its exception
swallowed below as a warning, a packaged layout where discovery ran before
the bundled tree was importable, or a stale empty-discovery cache). When
that happens the registry is empty and *both* web_search and web_extract
dead-end on "No web {search,extract} provider configured" even though the
keyless Parallel default is supposed to work with zero setup. So after
discovery we verify the keyless default landed and, if not, register the
bundled providers directly (see
:func:`_register_bundled_web_providers_directly`).
"""
try:
from hermes_cli.plugins import _ensure_plugins_discovered
@ -833,87 +783,6 @@ def _ensure_web_plugins_loaded() -> None:
# clue in normal logs about the real cause.
logger.warning("Web plugin discovery failed (non-fatal): %s", exc)
# Belt-and-suspenders: guarantee the keyless Parallel default (the
# documented zero-setup backend for both web_search and web_extract) is
# actually registered. The lookup is a cheap dict hit on the healthy path
# (discovery already registered it → no-op); only an empty registry pays
# for the direct-registration sweep.
try:
from agent.web_search_registry import get_provider
if get_provider("parallel") is None:
_register_bundled_web_providers_directly()
except Exception as exc: # noqa: BLE001
logger.debug("Bundled web provider fallback check failed: %s", exc)
def _register_bundled_web_providers_directly() -> None:
"""Register the repo's bundled web providers without the plugin manager.
The normal path is the general plugin sweep
(:func:`hermes_cli.plugins._ensure_plugins_discovered`), which auto-loads
every ``plugins/web/<name>`` backend (they are ``kind: backend``). This
fallback exists for the runtimes where that sweep does not leave the web
registry populated so the keyless Parallel default (and any bundled
backend the user explicitly configured) keeps working instead of
surfacing a misleading "No web provider configured" error.
Imports each bundled ``plugins/web/<name>`` package and calls its
``register()`` directly against :mod:`agent.web_search_registry`. Idempotent
(re-register overwrites) and honors an explicit ``plugins.disabled`` entry
so a backend the user turned off stays off.
"""
try:
from hermes_cli.plugins import (
_get_disabled_plugins,
get_bundled_plugins_dir,
)
except Exception as exc: # noqa: BLE001
logger.debug("Bundled web provider fallback unavailable: %s", exc)
return
web_dir = get_bundled_plugins_dir() / "web"
if not web_dir.is_dir():
return
disabled = _get_disabled_plugins()
from agent.web_search_provider import WebSearchProvider
from agent.web_search_registry import register_provider
class _DirectRegistrationCtx:
"""Minimal plugin ctx exposing only web-provider registration."""
def register_web_search_provider(self, provider) -> None:
if isinstance(provider, WebSearchProvider):
register_provider(provider)
ctx = _DirectRegistrationCtx()
import importlib
for child in sorted(web_dir.iterdir()):
if not child.is_dir():
continue
if not (child / "plugin.yaml").exists() and not (child / "plugin.yml").exists():
continue
# Respect an explicit disable — match discover_and_load's key/name
# check (key ``web/<dir>``; manifest name ``web-<dir-with-dashes>``).
if (
f"web/{child.name}" in disabled
or f"web-{child.name.replace('_', '-')}" in disabled
):
continue
try:
module = importlib.import_module(f"plugins.web.{child.name}")
register_fn = getattr(module, "register", None)
if callable(register_fn):
register_fn(ctx)
except Exception as exc: # noqa: BLE001
logger.debug(
"Direct registration of bundled web provider '%s' failed: %s",
child.name, exc,
)
def web_search_tool(query: str, limit: int = 5) -> str:
"""
@ -1103,19 +972,11 @@ async def web_extract_tool(
else:
safe_urls.append(url)
# Tracks the free-tier Parallel extract path (no key → web_fetch via the
# hosted Search MCP) so we can credit Parallel in the output/UI. Bound
# here so empty/all-blocked inputs (which skip dispatch) stay defined.
_free_parallel_extract = False
# Dispatch only safe URLs to the configured backend
if not safe_urls:
results = []
else:
backend = _get_extract_backend()
_free_parallel_extract = (
backend == "parallel" and not _has_env("PARALLEL_API_KEY")
)
# All seven providers (brave-free, ddgs, searxng, exa, parallel,
# tavily, firecrawl) now live as plugins. The dispatcher is a
@ -1289,14 +1150,6 @@ async def web_extract_tool(
for r in response.get("results", [])
]
trimmed_response = {"results": trimmed_results}
if _free_parallel_extract:
# Credit Parallel's free Search MCP (drives the "[Parallel]" UI tag
# + lets the model cite the source). Free tier only.
trimmed_response["provider"] = "parallel"
trimmed_response["attribution"] = (
"Extraction powered by the free Parallel Web Search MCP "
"(https://parallel.ai)."
)
if trimmed_response.get("results") == []:
result_json = tool_error("Content was inaccessible or not found")
@ -1328,61 +1181,16 @@ async def web_extract_tool(
return tool_error(error_msg)
def web_tools_registered() -> bool:
"""Whether the web tools should be registered. Always True.
Registration is decoupled from credential readiness: with no credentials,
search/extract fall back to Parallel's free hosted Search MCP, and an
explicitly configured-but-unavailable backend must stay registered so
dispatch surfaces that backend's own setup error rather than the tool
silently vanishing. For "is web actually configured?" use
:func:`check_web_api_key`.
"""
return True
def _parallel_provider_registered() -> bool:
"""True when the bundled ``web-parallel`` provider is registered/enabled.
Plugin discovery skips disabled plugins, so a disabled (``plugins.disabled``)
or otherwise-unregistered parallel provider yields ``None`` here.
"""
_ensure_web_plugins_loaded()
try:
from agent.web_search_registry import get_provider
return get_provider("parallel") is not None
except Exception: # noqa: BLE001
return False
def _backend_usable(backend: str) -> bool:
"""True when *backend* can service calls. Keyless Parallel counts (free MCP).
Unknown/typo'd backend names are not usable (so an explicit typo is reported
as a config problem rather than masked by the keyless fallback).
"""
if backend == "parallel" and not _has_env("PARALLEL_API_KEY"):
# Keyless Parallel is only genuinely usable when its provider is actually
# registered/enabled. If web-parallel is disabled or discovery failed,
# report unusable so setup is not skipped and the user is not left with
# web tools that fail at runtime ("No web search provider configured").
return _parallel_provider_registered()
return _is_backend_available(backend)
# Convenience function to check Firecrawl credentials
def check_web_api_key() -> bool:
"""Usability probe: True when the selected web backends can service calls.
Probes the backends that :func:`_get_search_backend` /
:func:`_get_extract_backend` actually select (not just shared
``web.backend``), so an explicit per-capability backend with missing
credentials or a typo'd name — reports unusable instead of being masked by
the keyless Parallel fallback. Keyless Parallel itself genuinely services
calls, so a zero-setup install reports usable. Distinct from
:func:`web_tools_registered` (always True whether the tool is offered).
"""
return _backend_usable(_get_search_backend()) and _backend_usable(_get_extract_backend())
"""Check whether the configured web backend is available."""
configured = _load_web_config().get("backend", "").lower().strip()
if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}:
return _is_backend_available(configured)
return any(
_is_backend_available(backend)
for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai")
)
def check_auxiliary_model() -> bool:
@ -1550,7 +1358,7 @@ registry.register(
toolset="web",
schema=WEB_SEARCH_SCHEMA,
handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=args.get("limit", 5)),
check_fn=web_tools_registered,
check_fn=check_web_api_key,
requires_env=_web_requires_env(),
emoji="🔍",
max_result_size_chars=100_000,
@ -1561,7 +1369,7 @@ registry.register(
schema=WEB_EXTRACT_SCHEMA,
handler=lambda args, **kw: web_extract_tool(
args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"),
check_fn=web_tools_registered,
check_fn=check_web_api_key,
requires_env=_web_requires_env(),
is_async=True,
emoji="📄",

8
uv.lock generated
View file

@ -1654,7 +1654,7 @@ requires-dist = [
{ name = "numpy", marker = "extra == 'voice'", specifier = "==2.4.3" },
{ name = "openai", specifier = "==2.24.0" },
{ name = "packaging", specifier = "==26.0" },
{ name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.6.0" },
{ name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.4.2" },
{ name = "pathspec", specifier = "==1.1.1" },
{ name = "pillow", specifier = "==12.2.0" },
{ name = "prompt-toolkit", specifier = "==3.0.52" },
@ -2690,7 +2690,7 @@ wheels = [
[[package]]
name = "parallel-web"
version = "0.6.0"
version = "0.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@ -2700,9 +2700,9 @@ dependencies = [
{ name = "sniffio" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7f/81/101c961fe6665212df01fb39a70ebb379dc33529c7bc9210675c0f525139/parallel_web-0.6.0.tar.gz", hash = "sha256:f8aecd3f1958090090c4516881cefea4f55c40948ba3bb99217ca9a6d4263225", size = 173149, upload-time = "2026-05-06T19:13:09.782Z" }
sdist = { url = "https://files.pythonhosted.org/packages/24/50/fb9b28a679e01682006b5259abff96de3d16e114e9447a7793fec31715de/parallel_web-0.4.2.tar.gz", hash = "sha256:599b5a8f387dc35c7dc8c81e372eadf6958a40acacea58bf170dfc663c003da7", size = 140026, upload-time = "2026-03-09T22:24:35.448Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a2/7c/7e8b63a0e90efaf567a818fca86c6ad3a85711f8995d2657b51b0cae2351/parallel_web-0.6.0-py3-none-any.whl", hash = "sha256:dc5342ef7262bd2e9f85eb7eace32833bd3d7e3af0bf5fbd780d1ea8c8d9ceb0", size = 199217, upload-time = "2026-05-06T19:13:08.316Z" },
{ url = "https://files.pythonhosted.org/packages/a0/3e/2218fa29637781b8e7ac35a928108ff2614ddd40879389d3af2caa725af5/parallel_web-0.4.2-py3-none-any.whl", hash = "sha256:aa3a4a9aecc08972c5ce9303271d4917903373dff4dd277d9a3e30f9cff53346", size = 144012, upload-time = "2026-03-09T22:24:33.979Z" },
]
[[package]]