revert(web): remove keyless Parallel search fallback (#46350)

Remove the free Parallel Search MCP path and restore the keyed Parallel backend behavior from before it was introduced. Also drops the keyless fallback registration/display labeling tests and returns the Parallel SDK pin to the prior version.
2026-06-15 09:21:36 +00:00 · 2026-06-14 16:47:57 -07:00 · 2026-06-14 16:47:57 -07:00 · f3fe99863d
commit f3fe99863d
parent a829e04d62
16 changed files with 98 additions and 1398 deletions
--- a/agent/display.py
+++ b/agent/display.py
@ -858,20 +858,6 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
    return False, ""


-def _used_free_parallel(result: str | None) -> bool:
-    """True when a web result came from Parallel's free Search MCP.
-
-    Only the keyless Parallel path tags its result with ``provider="parallel"``;
-    the paid REST path and every other provider omit it. Used to label the tool
-    line "Parallel search" / "Parallel fetch" exactly when the free MCP served
-    the call.
-    """
-    if not isinstance(result, str) or '"provider"' not in result:
-        return False
-    data = safe_json_loads(result)
-    return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
-
-
 def get_cute_tool_message(
    tool_name: str, args: dict, duration: float, result: str | None = None,
 ) -> str:
@ -909,17 +895,15 @@ def get_cute_tool_message(
        return f"{line}{failure_suffix}"

    if tool_name == "web_search":
-        verb = "Parallel search" if _used_free_parallel(result) else "search"
-        return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)}  {dur}")
+        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
-        verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra}  {dur}")
-        return _wrap(f"┊ 📄 {verb:<9} pages  {dur}")
+            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 fetch     pages  {dur}")
    if tool_name == "terminal":
        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -2182,13 +2182,8 @@ def _toolset_needs_configuration_prompt(
        tts_cfg = config.get("tts", {})
        return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg
    if ts_key == "web":
-        # Web works out of the box via Parallel's free Search MCP (no key), so
-        # don't force setup just because ``web.backend`` is unset — only prompt
-        # when web isn't actually usable (e.g. an explicit backend configured
-        # without its credentials). Lazy import: web_tools is heavy and most
-        # tools_config callers don't need it.
-        from tools.web_tools import check_web_api_key
-        return not check_web_api_key()
+        web_cfg = config.get("web", {})
+        return not isinstance(web_cfg, dict) or "backend" not in web_cfg
    if ts_key == "browser":
        browser_cfg = config.get("browser", {})
        return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
--- a/plugins/web/parallel/provider.py
+++ b/plugins/web/parallel/provider.py
@ -1,20 +1,14 @@
 """Parallel.ai web search + content extraction — plugin form.

-Subclasses :class:`agent.web_search_provider.WebSearchProvider`.
+Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Uses two
+distinct Parallel SDK clients:

-Search runs on one of two transports, picked by credential:
+- ``Parallel`` (sync)        — for :meth:`search`
+- ``AsyncParallel`` (async)  — for :meth:`extract`

- **No key →** the free hosted Search MCP at ``https://search.parallel.ai/mcp``
-  (anonymous Streamable-HTTP JSON-RPC). This makes ``web_search`` work out of
-  the box with zero setup, which is why ``parallel`` is the keyless default
-  backend in :func:`tools.web_tools._get_backend`.
- **``PARALLEL_API_KEY`` →** the ``parallel`` SDK's v1 ``search`` / ``extract``
-  REST endpoints (objective-tuned, mode-selectable, higher rate limits).
-
-Extract mirrors search: keyed uses the async SDK (``AsyncParallel``) v1
-``extract``; keyless uses the free MCP's ``web_fetch``. :meth:`extract` is
-declared ``async def`` and the dispatcher in
-:func:`tools.web_tools.web_extract_tool` detects coroutines via
+This is the first plugin to exercise the **async-extract** code path in
+the ABC: :meth:`extract` is declared ``async def``, and the dispatcher
+in :func:`tools.web_tools.web_extract_tool` detects coroutines via
 :func:`inspect.iscoroutinefunction` and awaits.

 Config keys this provider responds to::
@ -23,66 +17,25 @@ Config keys this provider responds to::
      search_backend: "parallel"      # explicit per-capability
      extract_backend: "parallel"     # explicit per-capability
      backend: "parallel"             # shared fallback
-      # Optional: search mode (default "advanced"; also "basic")
-      # via the PARALLEL_SEARCH_MODE env var. REST path only.
+      # Optional: search mode (default "agentic"; also "fast" or "one-shot")
+      # via the PARALLEL_SEARCH_MODE env var.

 Env vars::

-    PARALLEL_API_KEY=...             # https://parallel.ai (optional — unlocks
-                                     # the v1 REST Search API; without it,
-                                     # search and extract use the free MCP)
-    PARALLEL_SEARCH_MODE=advanced    # optional: basic|advanced (legacy
-                                     # fast/one-shot map to basic, agentic to
-                                     # advanced). REST path only.
+    PARALLEL_API_KEY=...             # https://parallel.ai (required)
+    PARALLEL_SEARCH_MODE=agentic     # optional: agentic|fast|one-shot
 """

 from __future__ import annotations

-import asyncio
-import json
 import logging
 import os
-import uuid
 from typing import Any, Dict, List

-import httpx
-
 from agent.web_search_provider import WebSearchProvider

 logger = logging.getLogger(__name__)

-# Free hosted Search MCP — anonymous-friendly, used when no PARALLEL_API_KEY is
-# configured. Docs: https://docs.parallel.ai/integrations/mcp/search-mcp
-_MCP_SEARCH_URL = "https://search.parallel.ai/mcp"
-_MCP_PROTOCOL_VERSION = "2025-06-18"
-# Deliberately generic client identity. Project policy (see the telemetry PR
-# policy in AGENTS.md) forbids third-party usage attribution without an
-# explicit user opt-in, so neither clientInfo nor the User-Agent names
-# hermes. MCP requires *a* clientInfo; a neutral one satisfies the spec
-# without attributing traffic.
-_MCP_CLIENT_NAME = "mcp-web-client"
-_MCP_CLIENT_VERSION = "1.0.0"
-_MCP_USER_AGENT = f"{_MCP_CLIENT_NAME}/{_MCP_CLIENT_VERSION}"
-_MCP_TIMEOUT_SECONDS = 30.0
-
-# Free-tier attribution. The hosted Search MCP is free to use; surfacing this
-# on keyless results credits Parallel and matches the free-tier terms
-# (https://parallel.ai/customer-terms).
-_FREE_MCP_ATTRIBUTION = (
-    "Search powered by the free Parallel Web Search MCP (https://parallel.ai)."
-)
-
-
-def _new_session_id() -> str:
-    """Mint a fresh Parallel ``session_id`` for a single tool call.
-
-    Per-call rather than process-global: one process serves many unrelated
-    chats in the gateway/batch runners, and a shared id would pool their
-    searches into one Parallel session. The prefix is deliberately generic
-    (no hermes attribution — telemetry policy).
-    """
-    return f"{_MCP_CLIENT_NAME}-{uuid.uuid4().hex}"
-
 # Module-level note: the canonical cache slots ``_parallel_client`` and
 # ``_async_parallel_client`` live on :mod:`tools.web_tools` so tests that do
 # ``tools.web_tools._parallel_client = None`` between cases see fresh state.
@ -180,319 +133,11 @@ _get_async_parallel_client = _get_async_client


 def _resolve_search_mode() -> str:
-    """Return the validated v1 search mode (default "advanced").
-
-    V1 collapses the three Beta modes into two. We accept the v1 values
-    directly and map the legacy Beta values for back-compat with anyone who
-    still sets ``PARALLEL_SEARCH_MODE=fast|one-shot|agentic``:
-
-    - ``fast`` / ``one-shot`` → ``basic``  (lower latency)
-    - ``agentic``             → ``advanced`` (higher quality, the v1 default)
-    """
-    mode = os.getenv("PARALLEL_SEARCH_MODE", "advanced").lower().strip()
-    if mode == "basic" or mode in {"fast", "one-shot"}:
-        return "basic"
-    # advanced, legacy "agentic", and anything unrecognized → the v1 default.
-    return "advanced"
-
-
-# ---------------------------------------------------------------------------
-# Free Search MCP transport (keyless path)
-# ---------------------------------------------------------------------------
-#
-# A small hand-rolled Streamable-HTTP JSON-RPC client for the hosted Search
-# MCP, rather than the full MCP-client subsystem: we only call two tools
-# (``web_search`` / ``web_fetch``), so keeping it inline lets web_search and
-# web_extract stay ordinary tools with the MCP endpoint as just their wire
-# protocol.
-
-
-def _mcp_headers(
-    session_id: str | None,
-    api_key: str | None,
-    protocol_version: str | None = None,
-) -> Dict[str, str]:
-    """Headers for an MCP request.
-
-    A Bearer token is attached only when we actually hold a key — the free
-    endpoint is anonymous, and sending an empty/garbage token would make it
-    401 instead of serving the anonymous tier. After ``initialize`` the
-    Streamable-HTTP spec expects the negotiated ``MCP-Protocol-Version`` on
-    every follow-up request, so we echo it once known.
-    """
-    headers = {
-        "Content-Type": "application/json",
-        "Accept": "application/json, text/event-stream",
-        "User-Agent": _MCP_USER_AGENT,
-    }
-    if session_id:
-        headers["Mcp-Session-Id"] = session_id
-    if protocol_version:
-        headers["MCP-Protocol-Version"] = protocol_version
-    if api_key:
-        headers["Authorization"] = f"Bearer {api_key}"
-    return headers
-
-
-def _iter_mcp_messages(text: str):
-    """Yield JSON-RPC message dicts from a plain-JSON or SSE response body.
-
-    Handles ``application/json`` (a single object) and ``text/event-stream``
-    (SSE: events separated by blank lines; an event's one-or-more ``data:``
-    lines concatenate into a single JSON payload). Unparseable chunks and
-    non-``data`` SSE fields (``event:``/``id:``/comments) are skipped.
-    """
-    def _emit(payload):
-        # Streamable HTTP allows batching responses/notifications into a JSON
-        # array — flatten so callers always see individual message dicts.
-        if isinstance(payload, list):
-            yield from payload
-        elif payload is not None:
-            yield payload
-
-    body = (text or "").strip()
-    if not body:
-        return
-    if body.startswith("{") or body.startswith("["):
-        try:
-            parsed = json.loads(body)
-        except json.JSONDecodeError:
-            return
-        yield from _emit(parsed)
-        return
-
-    data_lines: List[str] = []
-
-    def _flush():
-        if not data_lines:
-            return None
-        try:
-            return json.loads("\n".join(data_lines))
-        except json.JSONDecodeError:
-            return None
-
-    for raw in body.split("\n"):
-        line = raw.rstrip("\r")
-        if line.startswith("data:"):
-            data_lines.append(line[len("data:"):].lstrip())
-        elif line.strip() == "":  # event boundary
-            yield from _emit(_flush())
-            data_lines = []
-    yield from _emit(_flush())
-
-
-def _mcp_response_envelope(text: str, request_id: str) -> Dict[str, Any]:
-    """Select the JSON-RPC response for *request_id* from an MCP response body.
-
-    Streamable-HTTP servers may emit progress/log notifications before the
-    final result, so we scan the whole stream and return the result/error
-    message whose ``id`` matches our request. Falls back to the last
-    result/error-bearing message if no id matches; ``{}`` if none is present.
-    """
-    fallback: Dict[str, Any] = {}
-    for msg in _iter_mcp_messages(text):
-        if not isinstance(msg, dict) or not ("result" in msg or "error" in msg):
-            continue
-        if msg.get("id") == request_id:
-            return msg
-        fallback = msg
-    return fallback
-
-
-def _mcp_payload(envelope: Dict[str, Any]) -> Dict[str, Any]:
-    """Extract the tool result payload from a ``tools/call`` envelope.
-
-    Prefers ``structuredContent`` (authoritative machine-readable form);
-    otherwise scans text blocks for the first JSON-parseable one. Raises on a
-    JSON-RPC error or a tool-level ``isError``.
-    """
-    if "error" in envelope:
-        raise RuntimeError(f"Parallel MCP error: {str(envelope['error'])[:500]}")
-    result = envelope.get("result") or {}
-    if result.get("isError"):
-        raise RuntimeError(f"Parallel MCP tool error: {str(result)[:500]}")
-
-    structured = result.get("structuredContent")
-    if isinstance(structured, dict):
-        return structured
-
-    for block in result.get("content", []) or []:
-        if isinstance(block, dict) and block.get("type") == "text":
-            text = str(block.get("text") or "")
-            if not text:
-                continue
-            try:
-                return json.loads(text)
-            except json.JSONDecodeError:
-                continue
-    raise RuntimeError(
-        f"Parallel MCP returned no parseable content: {str(result)[:500]}"
-    )
-
-
-def _mcp_call(
-    tool_name: str, arguments: Dict[str, Any], api_key: str | None
-) -> Dict[str, Any]:
-    """Run the MCP handshake then a single ``tools/call`` and return its payload.
-
-    initialize → (capture ``Mcp-Session-Id``) → notifications/initialized →
-    tools/call ``tool_name``. Returns the parsed tool payload dict (see
-    :func:`_mcp_payload`). A Bearer token is attached only when *api_key* is set.
-    """
-    with httpx.Client(timeout=_MCP_TIMEOUT_SECONDS) as client:
-        # 1. initialize — capture the server-assigned MCP session id.
-        init_id = str(uuid.uuid4())
-        init = client.post(
-            _MCP_SEARCH_URL,
-            headers=_mcp_headers(None, api_key),
-            json={
-                "jsonrpc": "2.0",
-                "id": init_id,
-                "method": "initialize",
-                "params": {
-                    "protocolVersion": _MCP_PROTOCOL_VERSION,
-                    "capabilities": {},
-                    "clientInfo": {
-                        "name": _MCP_CLIENT_NAME,
-                        "version": _MCP_CLIENT_VERSION,
-                    },
-                },
-            },
-        )
-        init.raise_for_status()
-        # Only echo a session id the server actually issued. Stateless
-        # Streamable-HTTP servers may omit it; inventing one and sending it on
-        # follow-up requests can get those requests rejected (the server never
-        # created that session). When absent, the Mcp-Session-Id header is simply
-        # omitted (see _mcp_headers). This is separate from the tool-arg
-        # ``session_id`` below, which is a client-minted rate-limit/grouping id.
-        mcp_session_id = init.headers.get("mcp-session-id")
-        init_env = _mcp_response_envelope(init.text, init_id)
-        # Echo the negotiated protocol version on every post-init request, per
-        # the Streamable-HTTP spec (servers may enforce it).
-        negotiated_version = (
-            (init_env.get("result") or {}).get("protocolVersion")
-            or _MCP_PROTOCOL_VERSION
-        )
-
-        # 2. notifications/initialized — required handshake ack.
-        client.post(
-            _MCP_SEARCH_URL,
-            headers=_mcp_headers(mcp_session_id, api_key, negotiated_version),
-            json={"jsonrpc": "2.0", "method": "notifications/initialized"},
-        )
-
-        # 3. tools/call.
-        call_id = str(uuid.uuid4())
-        call = client.post(
-            _MCP_SEARCH_URL,
-            headers=_mcp_headers(mcp_session_id, api_key, negotiated_version),
-            json={
-                "jsonrpc": "2.0",
-                "id": call_id,
-                "method": "tools/call",
-                "params": {"name": tool_name, "arguments": arguments},
-            },
-        )
-        call.raise_for_status()
-        return _mcp_payload(_mcp_response_envelope(call.text, call_id))
-
-
-def _mcp_web_search(query: str, limit: int, api_key: str | None) -> Dict[str, Any]:
-    """Run a ``web_search`` tool call against the hosted Search MCP.
-
-    Returns the standard provider search shape
-    (``{"success": True, "data": {"web": [...]}}``). The MCP serves a fixed
-    result count, so ``limit`` is applied client-side. The MCP requires
-    ``objective`` (REST treats it as optional), so we mirror the query.
-    """
-    payload = _mcp_call(
-        "web_search",
-        {
-            "objective": query,
-            "search_queries": [query],
-            "session_id": _new_session_id(),
-        },
-        api_key,
-    )
-
-    web_results: List[Dict[str, Any]] = []
-    for i, result in enumerate((payload.get("results") or [])[: max(limit, 1)]):
-        if not isinstance(result, dict):
-            continue
-        excerpts = result.get("excerpts") or []
-        web_results.append(
-            {
-                "url": result.get("url") or "",
-                "title": result.get("title") or "",
-                "description": " ".join(excerpts) if excerpts else "",
-                "position": i + 1,
-            }
-        )
-
-    # Credit the free tier (anonymous path only — keyed search uses REST and
-    # carries no attribution).
-    return {
-        "success": True,
-        "data": {"web": web_results},
-        "provider": "parallel",
-        "attribution": _FREE_MCP_ATTRIBUTION,
-    }
-
-
-def _mcp_web_fetch(urls: List[str], api_key: str | None) -> List[Dict[str, Any]]:
-    """Run a ``web_fetch`` tool call against the hosted Search MCP.
-
-    Returns the per-URL extract shape that
-    :func:`tools.web_tools.web_extract_tool` expects — exactly one row per input
-    URL, in request order (including duplicates). We pass ``full_content=True``
-    so the page body comes back as markdown (matching the keyed SDK path and
-    what extract callers/summarizers expect), falling back to excerpts only when
-    full content is absent. Any input the MCP didn't return is emitted as a
-    per-URL error row.
-    """
-    payload = _mcp_call(
-        "web_fetch",
-        {"urls": list(urls), "full_content": True, "session_id": _new_session_id()},
-        api_key,
-    )
-
-    # Index the response by URL, then emit one row per *input* URL in order so
-    # duplicates and positional alignment with the request list are preserved.
-    by_url: Dict[str, Dict[str, Any]] = {}
-    for item in payload.get("results") or []:
-        if isinstance(item, dict) and item.get("url"):
-            by_url.setdefault(item["url"], item)
-
-    results: List[Dict[str, Any]] = []
-    for url in urls:
-        item = by_url.get(url)
-        if item is None:
-            results.append(
-                {
-                    "url": url,
-                    "title": "",
-                    "content": "",
-                    "error": "extraction failed (no content returned)",
-                    "metadata": {"sourceURL": url},
-                }
-            )
-            continue
-        title = item.get("title") or ""
-        # Prefer the full page body; fall back to joined excerpts (mirrors the
-        # keyed SDK extract path).
-        content = item.get("full_content") or "\n\n".join(item.get("excerpts") or [])
-        results.append(
-            {
-                "url": url,
-                "title": title,
-                "content": content,
-                "raw_content": content,
-                "metadata": {"sourceURL": url, "title": title},
-            }
-        )
-
-    return results
+    """Return the validated PARALLEL_SEARCH_MODE value (default "agentic")."""
+    mode = os.getenv("PARALLEL_SEARCH_MODE", "agentic").lower().strip()
+    if mode not in {"fast", "one-shot", "agentic"}:
+        mode = "agentic"
+    return mode


 class ParallelWebSearchProvider(WebSearchProvider):
@ -507,14 +152,7 @@ class ParallelWebSearchProvider(WebSearchProvider):
        return "Parallel"

    def is_available(self) -> bool:
-        """Return True when ``PARALLEL_API_KEY`` is set.
-
-        Deliberately key-based: this gates the registry's active-provider walk
-        and the ``hermes tools`` picker (auto-selecting Parallel for a user who
-        hasn't named it), so it must not claim availability on the keyless path.
-        The keyless free-MCP path is reached independently via
-        :func:`tools.web_tools._get_backend`'s ``parallel`` terminal default.
-        """
+        """Return True when ``PARALLEL_API_KEY`` is set to a non-empty value."""
        return bool(os.getenv("PARALLEL_API_KEY", "").strip())

    def supports_search(self) -> bool:
@ -526,11 +164,9 @@ class ParallelWebSearchProvider(WebSearchProvider):
    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
        """Execute a Parallel search (sync).

-        With ``PARALLEL_API_KEY`` set, uses the v1 ``search`` REST endpoint with
-        the configured mode (``PARALLEL_SEARCH_MODE`` env var, default
-        "advanced"; limit requested via advanced_settings.max_results, capped at
-        20). Without a key, falls back to the free hosted Search MCP so search
-        still works with zero setup.
+        Uses the ``beta.search`` endpoint with the configured mode
+        (``PARALLEL_SEARCH_MODE`` env var, default "agentic"). Limit is
+        capped at 20 server-side.
        """
        try:
            from tools.interrupt import is_interrupted
@ -538,31 +174,19 @@ class ParallelWebSearchProvider(WebSearchProvider):
            if is_interrupted():
                return {"success": False, "error": "Interrupted"}

-            api_key = os.getenv("PARALLEL_API_KEY", "").strip()
-            if not api_key:
-                logger.info(
-                    "Parallel search (free MCP): '%s' (limit=%d)", query, limit
-                )
-                return _mcp_web_search(query, limit, api_key=None)
-
            mode = _resolve_search_mode()
            logger.info(
-                "Parallel search (v1 REST): '%s' (mode=%s, limit=%d)",
-                query, mode, limit,
+                "Parallel search: '%s' (mode=%s, limit=%d)", query, mode, limit
            )
-            # v1 Search API. Request the caller's limit via max_results (capped
-            # at 20) so we don't rely on the API default — the slice below can
-            # only trim, not ask for more.
-            response = _get_sync_client().search(
+            response = _get_sync_client().beta.search(
                search_queries=[query],
                objective=query,
                mode=mode,
-                session_id=_new_session_id(),
-                advanced_settings={"max_results": min(max(limit, 1), 20)},
+                max_results=min(limit, 20),
            )

            web_results = []
-            for i, result in enumerate((response.results or [])[: max(limit, 1)]):
+            for i, result in enumerate(response.results or []):
                excerpts = result.excerpts or []
                web_results.append(
                    {
@ -573,8 +197,6 @@ class ParallelWebSearchProvider(WebSearchProvider):
                    }
                )

-            # Paid/REST path: no attribution and no "[Parallel]" label — the
-            # branding is specifically for the free Search MCP tier.
            return {"success": True, "data": {"web": web_results}}
        except ValueError as exc:
            return {"success": False, "error": str(exc)}
@ -590,12 +212,7 @@ class ParallelWebSearchProvider(WebSearchProvider):
    async def extract(
        self, urls: List[str], **kwargs: Any
    ) -> List[Dict[str, Any]]:
-        """Extract content from one or more URLs.
-
-        With ``PARALLEL_API_KEY`` set, uses the async SDK's v1 ``extract`` for
-        full page content. Without a key, falls back to the free hosted Search
-        MCP's ``web_fetch`` tool so extraction works with zero setup, mirroring
-        the keyless search path.
+        """Extract content from one or more URLs via the async SDK.

        Returns the legacy list-of-results shape that
        :func:`tools.web_tools.web_extract_tool` expects: one entry per
@ -610,21 +227,10 @@ class ParallelWebSearchProvider(WebSearchProvider):
                    {"url": u, "error": "Interrupted", "title": ""} for u in urls
                ]

-            api_key = os.getenv("PARALLEL_API_KEY", "").strip()
-            if not api_key:
-                logger.info(
-                    "Parallel extract (free MCP web_fetch): %d URL(s)", len(urls)
-                )
-                # _mcp_web_fetch is sync httpx; run off the event loop.
-                return await asyncio.to_thread(_mcp_web_fetch, list(urls), None)
-
-            logger.info("Parallel extract (v1 REST): %d URL(s)", len(urls))
-            # v1 Extract API (client.extract, /v1/extract); full_content is set
-            # via advanced_settings.
-            response = await _get_async_client().extract(
+            logger.info("Parallel extract: %d URL(s)", len(urls))
+            response = await _get_async_client().beta.extract(
                urls=urls,
-                advanced_settings={"full_content": True},
-                session_id=_new_session_id(),
+                full_content=True,
            )

            results: List[Dict[str, Any]] = []
@ -645,20 +251,13 @@ class ParallelWebSearchProvider(WebSearchProvider):
                )

            for error in response.errors or []:
-                err_url = getattr(error, "url", "") or ""
-                err_msg = (
-                    getattr(error, "message", None)
-                    or getattr(error, "content", None)
-                    or getattr(error, "error_type", None)
-                    or "extraction failed"
-                )
                results.append(
                    {
-                        "url": err_url,
+                        "url": error.url or "",
                        "title": "",
                        "content": "",
-                        "error": err_msg,
-                        "metadata": {"sourceURL": err_url},
+                        "error": error.content or error.error_type or "extraction failed",
+                        "metadata": {"sourceURL": error.url or ""},
                    }
                )

@ -680,16 +279,12 @@ class ParallelWebSearchProvider(WebSearchProvider):
    def get_setup_schema(self) -> Dict[str, Any]:
        return {
            "name": "Parallel",
-            "badge": "free",
-            "tag": (
-                "Free web search + extraction via Parallel's hosted Search MCP "
-                "— no key needed. Add PARALLEL_API_KEY for the v1 REST Search "
-                "API (richer modes, higher limits)."
-            ),
+            "badge": "paid",
+            "tag": "Objective-tuned search + parallel page extraction.",
            "env_vars": [
                {
                    "key": "PARALLEL_API_KEY",
-                    "prompt": "Parallel API key (optional — unlocks the v1 REST Search API)",
+                    "prompt": "Parallel API key",
                    "url": "https://parallel.ai",
                },
            ],
--- a/pyproject.toml
+++ b/pyproject.toml
@ -123,7 +123,7 @@ anthropic = ["anthropic==0.87.0"]  # CVE-2026-34450, CVE-2026-34452
 # search provider (configured via `hermes tools` or config.yaml).
 exa = ["exa-py==2.10.2"]
 firecrawl = ["firecrawl-py==4.17.0"]
-parallel-web = ["parallel-web==0.6.0"]
+parallel-web = ["parallel-web==0.4.2"]
 # Image generation backends
 fal = ["fal-client==0.13.1"]
 # Edge TTS — default TTS provider but still optional (users can pick
--- a/tests/agent/test_display.py
+++ b/tests/agent/test_display.py
@ -12,7 +12,6 @@ from agent.display import (
    set_tool_preview_max_len,
    _render_inline_unified_diff,
    _summarize_rendered_diff_sections,
-    _used_free_parallel,
    render_edit_diff_with_delta,
 )

@ -172,46 +171,6 @@ class TestCuteToolMessagePreviewLength:
        assert "[error]" not in line


-class TestWebProviderLabel:
-    """The free-path "Parallel search"/"Parallel fetch" verb labeling."""
-
-    def test_free_search_verb_is_parallel(self):
-        result = json.dumps({"success": True, "data": {"web": []}, "provider": "parallel"})
-        line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1, result=result)
-        assert "Parallel search" in line
-        assert "hello" in line
-
-    def test_paid_search_verb_is_plain(self):
-        result = json.dumps({"success": True, "data": {"web": [{"url": "u"}]}})
-        line = get_cute_tool_message("web_search", {"query": "hi"}, 0.1, result=result)
-        assert "Parallel" not in line
-        assert "search" in line
-
-    def test_missing_result_verb_is_plain(self):
-        line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1)
-        assert "Parallel" not in line
-        assert "search" in line
-
-    def test_helper_is_parallel_free_specific(self):
-        # Only Parallel's free MCP path marks results; nothing else does.
-        assert _used_free_parallel(json.dumps({"provider": "parallel"})) is True
-        assert _used_free_parallel(json.dumps({"provider": "exa"})) is False
-        assert _used_free_parallel(json.dumps({"provider": "firecrawl"})) is False
-        assert _used_free_parallel(json.dumps({"success": True, "data": {}})) is False
-        assert _used_free_parallel('not json') is False
-        assert _used_free_parallel(None) is False
-
-    def test_free_extract_verb_is_parallel(self):
-        result = json.dumps({"results": [{"url": "u", "content": "x"}], "provider": "parallel"})
-        line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result)
-        assert "Parallel fetch" in line
-
-    def test_paid_extract_verb_is_plain(self):
-        result = json.dumps({"results": [{"url": "u", "content": "x"}]})
-        line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result)
-        assert "Parallel" not in line
-
-
 class TestEditDiffPreview:
    def test_extract_edit_diff_for_patch(self):
        diff = extract_edit_diff("patch", '{"success": true, "diff": "--- a/x\\n+++ b/x\\n"}')
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@ -975,19 +975,6 @@ def test_toolset_has_keys_treats_no_key_providers_as_configured():
    assert _toolset_has_keys("computer_use", config) is True


-def test_web_no_prompt_when_usable_keyless():
-    """Fresh install: web works via the free Parallel MCP, so enabling the web
-    toolset should not force provider setup."""
-    with patch("tools.web_tools.check_web_api_key", return_value=True):
-        assert _toolset_needs_configuration_prompt("web", {}) is False
-
-
-def test_web_no_prompt_when_extract_backend_is_extract_capable():
-    with patch("tools.web_tools.check_web_api_key", return_value=True):
-        cfg = {"web": {"extract_backend": "parallel"}}
-        assert _toolset_needs_configuration_prompt("web", cfg) is False
-
-
 def test_computer_use_needs_configuration_when_cua_driver_post_setup_pending():
    """No-key providers can still need setup when their post_setup is unsatisfied.

--- a/tests/plugins/web/test_parallel_keyless_mcp.py
+++ b/tests/plugins/web/test_parallel_keyless_mcp.py
@ -1,383 +0,0 @@
-"""Keyless Parallel search via the free hosted Search MCP.
-
-Covers the transport added in ``plugins/web/parallel/provider.py`` that lets
-``web_search`` work with no ``PARALLEL_API_KEY``:
-
- ``_mcp_headers``  — Bearer attached only when a key is held
- ``_decode_mcp_envelope`` — plain-JSON and SSE (``data:``) response bodies
- ``_mcp_payload`` — structuredContent preferred, text-block JSON fallback, errors
- ``_mcp_web_search`` — full handshake (mocked transport) → standard search shape
- ``ParallelWebSearchProvider.search`` — keyless path routes to the MCP
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-from unittest.mock import patch
-
-import pytest
-
-import plugins.web.parallel.provider as pp
-
-
-# ─── _mcp_headers ──────────────────────────────────────────────────────────
-
-class TestMcpHeaders:
-    def test_anonymous_has_no_authorization(self):
-        h = pp._mcp_headers(session_id=None, api_key=None)
-        assert "Authorization" not in h
-        assert h["Accept"] == "application/json, text/event-stream"
-        assert "Mcp-Session-Id" not in h
-
-    def test_user_agent_is_generic_not_hermes(self):
-        # Telemetry policy: no third-party usage attribution without opt-in.
-        # The UA must be set (not python-httpx default) but must not name
-        # hermes, on both the anonymous and keyed paths.
-        for ua in (
-            pp._mcp_headers(session_id=None, api_key=None)["User-Agent"],
-            pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"],
-        ):
-            assert ua == f"{pp._MCP_CLIENT_NAME}/{pp._MCP_CLIENT_VERSION}"
-            assert "hermes" not in ua.lower()
-
-    def test_session_id_and_bearer_when_present(self):
-        h = pp._mcp_headers(session_id="sid-123", api_key="pk-live")
-        assert h["Mcp-Session-Id"] == "sid-123"
-        assert h["Authorization"] == "Bearer pk-live"
-
-
-# ─── SSE / JSON-RPC parsing ──────────────────────────────────────────────────
-
-class TestMcpResponseParsing:
-    def test_plain_json_matched_by_id(self):
-        body = '{"jsonrpc":"2.0","id":"abc","result":{"ok":true}}'
-        assert pp._mcp_response_envelope(body, "abc")["result"]["ok"] is True
-
-    def test_sse_selects_response_for_request_id_skipping_notifications(self):
-        # A progress notification (no id) precedes the real result; an unrelated
-        # response id is also present. We must pick the one matching our id.
-        body = (
-            'event: message\ndata: {"jsonrpc":"2.0","method":"notifications/progress","params":{"p":1}}\n\n'
-            'event: message\ndata: {"jsonrpc":"2.0","id":"other","result":{"ok":false}}\n\n'
-            'event: message\ndata: {"jsonrpc":"2.0","id":"req-1","result":{"ok":true}}\n\n'
-        )
-        env = pp._mcp_response_envelope(body, "req-1")
-        assert env["result"]["ok"] is True
-
-    def test_sse_multiline_data_concatenated(self):
-        body = 'data: {"jsonrpc":"2.0","id":"x",\ndata: "result":{"n":42}}\n\n'
-        assert pp._mcp_response_envelope(body, "x")["result"]["n"] == 42
-
-    def test_falls_back_to_last_result_when_id_absent(self):
-        body = '{"jsonrpc":"2.0","id":"server-chose","result":{"ok":true}}'
-        # request id doesn't match, but there's a single result → use it
-        assert pp._mcp_response_envelope(body, "mismatch")["result"]["ok"] is True
-
-    def test_empty_body(self):
-        assert pp._mcp_response_envelope("", "x") == {}
-        assert pp._mcp_response_envelope("   ", "x") == {}
-
-    def test_batched_json_array_flattened(self):
-        # Streamable HTTP may batch messages into a JSON array.
-        body = ('[{"jsonrpc":"2.0","method":"notifications/progress"},'
-                '{"jsonrpc":"2.0","id":"req-9","result":{"ok":true}}]')
-        assert pp._mcp_response_envelope(body, "req-9")["result"]["ok"] is True
-
-    def test_batched_sse_data_array_flattened(self):
-        body = 'data: [{"jsonrpc":"2.0","id":"a","result":{"n":1}}]\n\n'
-        assert pp._mcp_response_envelope(body, "a")["result"]["n"] == 1
-
-
-# ─── _mcp_payload ────────────────────────────────────────────────────────────
-
-class TestMcpPayload:
-    def test_prefers_structured_content(self):
-        env = {"result": {"structuredContent": {"results": [{"url": "u"}]},
-                          "content": [{"type": "text", "text": "ignored"}]}}
-        assert pp._mcp_payload(env) == {"results": [{"url": "u"}]}
-
-    def test_parses_text_block_json(self):
-        inner = {"search_id": "s1", "results": [{"url": "u", "title": "t"}]}
-        env = {"result": {"content": [{"type": "text", "text": json.dumps(inner)}]}}
-        assert pp._mcp_payload(env)["search_id"] == "s1"
-
-    def test_raises_on_jsonrpc_error(self):
-        with pytest.raises(RuntimeError, match="Parallel MCP error"):
-            pp._mcp_payload({"error": {"code": -32000, "message": "boom"}})
-
-    def test_raises_on_tool_iserror(self):
-        with pytest.raises(RuntimeError, match="Parallel MCP tool error"):
-            pp._mcp_payload({"result": {"isError": True, "content": []}})
-
-
-# ─── _mcp_web_search (mocked transport) ──────────────────────────────────────
-
-class _FakeResponse:
-    def __init__(self, *, text="", headers=None):
-        self.text = text
-        self.headers = headers or {}
-
-    def raise_for_status(self):
-        return None
-
-
-class _FakeClient:
-    """Stands in for httpx.Client: replays init → ack → tools/call."""
-
-    def __init__(self, search_payload, init_session_id="server-sid"):
-        self._search_payload = search_payload
-        self._init_session_id = init_session_id
-        self.calls = []
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *exc):
-        return False
-
-    def post(self, url, headers=None, json=None):
-        self.calls.append({"headers": headers, "json": json})
-        req = json or {}
-        method = req.get("method")
-        req_id = req.get("id")
-        if method == "initialize":
-            # Echo the request id, as the real server does.
-            return _FakeResponse(
-                text=json_dumps({"jsonrpc": "2.0", "id": req_id,
-                                 "result": {"protocolVersion": "2099-01-01"}}),
-                headers=(
-                    {"mcp-session-id": self._init_session_id}
-                    if self._init_session_id is not None
-                    else {}
-                ),
-            )
-        if method == "notifications/initialized":
-            return _FakeResponse(text="")
-        # tools/call
-        envelope = {"jsonrpc": "2.0", "id": req_id, "result": {
-            "content": [{"type": "text", "text": json_dumps(self._search_payload)}],
-        }}
-        return _FakeResponse(text=json_dumps(envelope))
-
-
-def json_dumps(obj):
-    return json.dumps(obj)
-
-
-class TestMcpWebSearch:
-    def _payload(self, n):
-        return {"search_id": "s", "results": [
-            {"url": f"https://ex/{i}", "title": f"t{i}",
-             "excerpts": [f"a{i}", f"b{i}"]}
-            for i in range(n)
-        ]}
-
-    def test_returns_standard_shape_and_handshake(self):
-        fake = _FakeClient(self._payload(3))
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            out = pp._mcp_web_search("hello", limit=5, api_key=None)
-
-        assert out["success"] is True
-        # Free-tier results credit Parallel.
-        assert "Parallel" in out["attribution"]
-        web = out["data"]["web"]
-        assert [r["position"] for r in web] == [1, 2, 3]
-        assert web[0]["url"] == "https://ex/0"
-        assert web[0]["description"] == "a0 b0"  # excerpts joined
-        # handshake order
-        methods = [c["json"].get("method") for c in fake.calls]
-        assert methods == ["initialize", "notifications/initialized", "tools/call"]
-        # session id from the initialize response header is reused
-        assert fake.calls[-1]["headers"]["Mcp-Session-Id"] == "server-sid"
-
-    def test_stateless_server_no_session_header_not_invented(self):
-        # A stateless Streamable-HTTP server may omit mcp-session-id on
-        # initialize; we must NOT invent one (sending an unissued session id can
-        # get follow-up requests rejected). The follow-ups carry no header.
-        fake = _FakeClient(self._payload(1), init_session_id=None)
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            out = pp._mcp_web_search("hello", limit=5, api_key=None)
-        assert out["success"] is True
-        follow_ups = [c for c in fake.calls if c["json"].get("method") != "initialize"]
-        assert follow_ups, "expected notifications/initialized + tools/call"
-        assert all("Mcp-Session-Id" not in c["headers"] for c in follow_ups)
-        # anonymous → no Authorization on any call
-        assert all("Authorization" not in c["headers"] for c in fake.calls)
-        # tools/call mirrors query into objective + search_queries
-        args = fake.calls[-1]["json"]["params"]["arguments"]
-        assert args["objective"] == "hello"
-        assert args["search_queries"] == ["hello"]
-
-    def test_limit_is_applied_client_side(self):
-        fake = _FakeClient(self._payload(10))
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            out = pp._mcp_web_search("q", limit=2, api_key=None)
-        assert len(out["data"]["web"]) == 2
-
-    def test_bearer_attached_when_key_present(self):
-        fake = _FakeClient(self._payload(1))
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            pp._mcp_web_search("q", limit=1, api_key="pk-live")
-        assert all(c["headers"]["Authorization"] == "Bearer pk-live" for c in fake.calls)
-
-    def test_negotiated_protocol_version_echoed_post_init(self):
-        fake = _FakeClient(self._payload(1))
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            pp._mcp_web_search("q", limit=1, api_key=None)
-        # initialize request doesn't carry the (not-yet-negotiated) version...
-        assert "MCP-Protocol-Version" not in fake.calls[0]["headers"]
-        # ...but notifications/initialized and tools/call echo the negotiated one.
-        assert fake.calls[1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
-        assert fake.calls[-1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
-
-
-# ─── provider.search keyless routing ─────────────────────────────────────────
-
-class TestProviderKeylessSearch:
-    def test_search_without_key_uses_mcp(self, monkeypatch):
-        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
-        captured = {}
-
-        def _fake(query, limit, api_key):
-            captured.update(query=query, limit=limit, api_key=api_key)
-            return {"success": True, "data": {"web": []}}
-
-        monkeypatch.setattr(pp, "_mcp_web_search", _fake)
-        out = pp.ParallelWebSearchProvider().search("kittens", limit=4)
-        assert out["success"] is True
-        assert captured == {"query": "kittens", "limit": 4, "api_key": None}
-
-    def test_is_available_reflects_key(self, monkeypatch):
-        # is_available() gates the registry's active-provider walk + picker, so
-        # it's key-based (keyless dispatch is handled by _get_backend, not this).
-        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
-        assert pp.ParallelWebSearchProvider().is_available() is False
-        monkeypatch.setenv("PARALLEL_API_KEY", "k")
-        assert pp.ParallelWebSearchProvider().is_available() is True
-
-
-# ─── web_fetch (keyless extract) ─────────────────────────────────────────────
-
-class TestMcpWebFetch:
-    def _payload(self, urls):
-        return {"extract_id": "e1", "results": [
-            {"url": u, "title": f"T{i}", "publish_date": None,
-             "excerpts": [f"chunk-a-{i}", f"chunk-b-{i}"]}
-            for i, u in enumerate(urls)
-        ]}
-
-    def test_maps_to_extract_shape(self):
-        urls = ["https://a.test", "https://b.test"]
-        fake = _FakeClient(self._payload(urls))
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            out = pp._mcp_web_fetch(urls, api_key=None)
-        assert [r["url"] for r in out] == urls
-        assert out[0]["content"] == "chunk-a-0\n\nchunk-b-0"
-        assert out[0]["raw_content"] == out[0]["content"]
-        assert out[0]["metadata"] == {"sourceURL": "https://a.test", "title": "T0"}
-        # tools/call targeted web_fetch, requesting full page bodies.
-        args = fake.calls[-1]["json"]["params"]
-        assert args["name"] == "web_fetch"
-        assert args["arguments"]["urls"] == urls
-        assert args["arguments"]["full_content"] is True
-        assert args["arguments"]["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-")
-
-    def test_prefers_full_content_over_excerpts(self):
-        payload = {"results": [
-            {"url": "https://a.test", "title": "T",
-             "excerpts": ["snippet"], "full_content": "the entire page body"},
-        ]}
-        fake = _FakeClient(payload)
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            out = pp._mcp_web_fetch(["https://a.test"], api_key=None)
-        assert out[0]["content"] == "the entire page body"
-
-    def test_missing_url_becomes_error_entry(self):
-        # Server returns only one of the two requested URLs.
-        fake = _FakeClient(self._payload(["https://a.test"]))
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            out = pp._mcp_web_fetch(["https://a.test", "https://missing.test"], api_key=None)
-        assert len(out) == 2
-        missing = [r for r in out if r["url"] == "https://missing.test"][0]
-        assert "error" in missing
-        assert missing["content"] == ""
-
-    def test_preserves_order_and_duplicate_inputs(self):
-        # MCP returns each unique URL once; output must still be one row per
-        # input, in order, including the duplicate.
-        fake = _FakeClient(self._payload(["https://a.test", "https://b.test"]))
-        urls = ["https://b.test", "https://a.test", "https://b.test"]
-        with patch.object(pp.httpx, "Client", return_value=fake):
-            out = pp._mcp_web_fetch(urls, api_key=None)
-        assert [r["url"] for r in out] == urls  # one row per input, in order
-        assert all("error" not in r for r in out)  # all three resolved
-
-    def test_extract_without_key_uses_web_fetch(self, monkeypatch):
-        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
-        captured = {}
-
-        def _fake(urls, api_key):
-            captured.update(urls=list(urls), api_key=api_key)
-            return [{"url": urls[0], "title": "", "content": "x",
-                     "raw_content": "x", "metadata": {}}]
-
-        monkeypatch.setattr(pp, "_mcp_web_fetch", _fake)
-        out = asyncio.run(pp.ParallelWebSearchProvider().extract(["https://x.test"]))
-        assert out[0]["content"] == "x"
-        assert captured == {"urls": ["https://x.test"], "api_key": None}
-
-
-# ─── keyed v1 REST search ────────────────────────────────────────────────────
-
-class TestKeyedV1Search:
-    def test_passes_max_results_and_omits_branding(self, monkeypatch):
-        monkeypatch.setenv("PARALLEL_API_KEY", "pk-live")
-        monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
-        captured = {}
-
-        class _Res:
-            def __init__(self, url):
-                self.url, self.title, self.excerpts = url, "T", ["x"]
-
-        class _Resp:
-            results = [_Res(f"https://r/{i}") for i in range(10)]
-
-        class _Client:
-            def search(self, **kw):
-                captured.update(kw)
-                return _Resp()
-
-        monkeypatch.setattr(pp, "_get_sync_client", lambda: _Client())
-        out = pp.ParallelWebSearchProvider().search("q", limit=7)
-
-        assert out["success"] is True
-        # honors the caller's limit via advanced_settings.max_results
-        assert captured["advanced_settings"] == {"max_results": 7}
-        assert captured["mode"] == "advanced"            # v1 default
-        assert captured["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-")  # per-call id
-        assert len(out["data"]["web"]) == 7              # client-side slice
-        # paid path: no free-tier attribution, no [Parallel] label signal
-        assert "attribution" not in out
-        assert "provider" not in out
-
-
-# ─── v1 search mode mapping ──────────────────────────────────────────────────
-
-class TestResolveSearchMode:
-    @pytest.mark.parametrize("env,expected", [
-        (None, "advanced"),        # default
-        ("advanced", "advanced"),
-        ("basic", "basic"),
-        ("fast", "basic"),         # legacy → basic
-        ("one-shot", "basic"),     # legacy → basic
-        ("agentic", "advanced"),   # legacy → advanced
-        ("garbage", "advanced"),   # invalid → default
-        ("BASIC", "basic"),        # case-insensitive
-    ])
-    def test_mode_mapping(self, monkeypatch, env, expected):
-        if env is None:
-            monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
-        else:
-            monkeypatch.setenv("PARALLEL_SEARCH_MODE", env)
-        assert pp._resolve_search_mode() == expected
--- a/tests/plugins/web/test_web_search_provider_plugins.py
+++ b/tests/plugins/web/test_web_search_provider_plugins.py
@ -193,16 +193,11 @@ class TestIsAvailable:
        assert p.is_available() is True

    def test_parallel_requires_api_key(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        """is_available() is key-based — it gates the registry's active-provider
-        walk/picker. (Keyless search/extract still work via the free MCP through
-        _get_backend's terminal default, independent of this flag.)
-        """
        _ensure_plugins_loaded()
        from agent.web_search_registry import get_provider

        p = get_provider("parallel")
        assert p is not None
-        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
        assert p.is_available() is False
        monkeypatch.setenv("PARALLEL_API_KEY", "real")
        assert p.is_available() is True
@ -427,33 +422,17 @@ class TestErrorResponseShapes:
        assert result.get("success") is False
        assert "error" in result

-    def test_parallel_extract_keyless_uses_mcp_web_fetch(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
-        """Without a key, extract routes to the free MCP web_fetch tool rather
-        than erroring. The MCP transport is mocked so the test stays offline."""
+    def test_parallel_extract_returns_per_url_errors_when_unconfigured(self) -> None:
        _ensure_plugins_loaded()
        from agent.web_search_registry import get_provider
-        import plugins.web.parallel.provider as parallel_provider
-
-        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
-        captured = {}
-
-        def _fake_fetch(urls, api_key):
-            captured["urls"] = list(urls)
-            captured["api_key"] = api_key
-            return [{"url": urls[0], "title": "Example", "content": "body",
-                     "raw_content": "body", "metadata": {"sourceURL": urls[0]}}]
-
-        monkeypatch.setattr(parallel_provider, "_mcp_web_fetch", _fake_fetch)

        p = get_provider("parallel")
        assert p is not None
        result = asyncio.run(p.extract(["https://example.com"]))
        assert isinstance(result, list)
+        assert len(result) == 1
+        assert "error" in result[0]
        assert result[0]["url"] == "https://example.com"
-        assert result[0]["content"] == "body"
-        assert captured == {"urls": ["https://example.com"], "api_key": None}

    def test_firecrawl_extract_returns_per_url_errors_when_unconfigured(self) -> None:
        _ensure_plugins_loaded()
--- a/tests/tools/test_web_keyless_default_fallback.py
+++ b/tests/tools/test_web_keyless_default_fallback.py
@ -1,100 +0,0 @@
-"""Regression: the keyless Parallel web default must survive a failed sweep.
-
-``web_search`` / ``web_extract`` are documented to work out of the box with
-zero setup via the bundled keyless Parallel free-MCP backend. That guarantee
-only holds if the bundled ``plugins/web/*`` providers are registered in
-``agent.web_search_registry``. The dispatch triggers the general plugin sweep
-(:func:`hermes_cli.plugins._ensure_plugins_discovered`) to do that — but the
-sweep can finish without registering them (its exception swallowed as a
-warning, a packaged layout where it ran before the bundled tree was
-importable, or a stale empty-discovery cache). When that happened, *both*
-tools dead-ended on "No web {search,extract} provider configured" even though
-no setup should be needed.
-
-These tests pin the invariant that :func:`tools.web_tools._ensure_web_plugins_loaded`
-guarantees the keyless default is registered regardless of the sweep's outcome,
-and that the direct-registration fallback honors an explicit ``plugins.disabled``
-entry. Real imports from the bundled plugin modules — no provider mocking.
-"""
-from __future__ import annotations
-
-import pytest
-
-import agent.web_search_registry as reg
-import hermes_cli.plugins as plugins
-from tools import web_tools
-
-
-@pytest.fixture(autouse=True)
-def _clean_registry():
-    reg._reset_for_tests()
-    yield
-    reg._reset_for_tests()
-
-
-def _boom(*_a, **_k):
-    raise RuntimeError("discovery boom")
-
-
-def test_keyless_default_registered_when_discovery_raises(monkeypatch):
-    """A swallowed discovery failure must not strand the keyless default."""
-    monkeypatch.setattr(plugins, "_ensure_plugins_discovered", _boom)
-    assert reg.get_provider("parallel") is None
-
-    web_tools._ensure_web_plugins_loaded()
-
-    parallel = reg.get_provider("parallel")
-    assert parallel is not None, "keyless Parallel default not restored"
-    # It is the universal keyless default precisely because it does both.
-    assert parallel.supports_search()
-    assert parallel.supports_extract()
-
-
-def test_fallback_registers_full_bundled_set(monkeypatch):
-    """The fix covers the whole bundled provider class, not just parallel."""
-    monkeypatch.setattr(plugins, "_ensure_plugins_discovered", _boom)
-
-    web_tools._ensure_web_plugins_loaded()
-
-    names = {p.name for p in reg.list_providers()}
-    # Every bundled backend a user might have configured should be reachable
-    # again, so an explicit ``web.extract_backend: firecrawl`` etc. resolves.
-    for expected in ("parallel", "firecrawl", "tavily", "exa"):
-        assert expected in names, f"{expected} missing after fallback"
-
-
-def test_fallback_honors_explicit_disable(monkeypatch):
-    """A backend the user turned off via plugins.disabled stays off."""
-    monkeypatch.setattr(plugins, "_get_disabled_plugins", lambda: {"web-parallel"})
-
-    web_tools._register_bundled_web_providers_directly()
-
-    names = {p.name for p in reg.list_providers()}
-    assert "parallel" not in names, "explicit disable was ignored"
-    # Other bundled backends are unaffected by the parallel disable.
-    assert "tavily" in names
-
-
-def test_fallback_is_noop_when_discovery_already_registered(monkeypatch):
-    """Healthy path: don't pay for the direct sweep when parallel is present."""
-    # Pretend the general sweep already registered the keyless default.
-    import importlib
-
-    class _Ctx:
-        def register_web_search_provider(self, provider):
-            reg.register_provider(provider)
-
-    importlib.import_module("plugins.web.parallel").register(_Ctx())
-    monkeypatch.setattr(plugins, "_ensure_plugins_discovered", lambda *a, **k: None)
-
-    calls = {"n": 0}
-    real = web_tools._register_bundled_web_providers_directly
-
-    def _spy():
-        calls["n"] += 1
-        real()
-
-    monkeypatch.setattr(web_tools, "_register_bundled_web_providers_directly", _spy)
-    web_tools._ensure_web_plugins_loaded()
-
-    assert calls["n"] == 0, "direct-registration ran on the healthy path"
--- a/tests/tools/test_web_providers.py
+++ b/tests/tools/test_web_providers.py
@ -167,21 +167,6 @@ class TestPerCapabilityBackendSelection:
        monkeypatch.setenv("TAVILY_API_KEY", "test-key")
        assert web_tools._get_search_backend() == "tavily"

-    def test_explicit_extract_backend_honored_when_unavailable(self, monkeypatch):
-        """An explicit per-capability backend is honored even with no creds, so
-        its setup error surfaces instead of silently rerouting to the keyless
-        Parallel default (which would send user URLs to a different provider)."""
-        from tools import web_tools
-
-        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
-            "extract_backend": "firecrawl",
-        })
-        for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_GATEWAY_URL"):
-            monkeypatch.delenv(key, raising=False)
-        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False, raising=False)
-        # Resolves to firecrawl (not parallel) despite firecrawl being unavailable.
-        assert web_tools._get_extract_backend() == "firecrawl"
-
    def test_falls_back_to_generic_backend_when_extract_backend_empty(self, monkeypatch):
        from tools import web_tools

@ -192,7 +177,7 @@ class TestPerCapabilityBackendSelection:
        monkeypatch.setenv("PARALLEL_API_KEY", "test-key")
        assert web_tools._get_extract_backend() == "parallel"

-    def test_explicit_search_backend_honored_when_unavailable(self, monkeypatch):
+    def test_search_backend_ignored_when_not_available(self, monkeypatch):
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
@ -201,10 +186,8 @@ class TestPerCapabilityBackendSelection:
        })
        monkeypatch.delenv("EXA_API_KEY", raising=False)
        monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
-        # The explicit per-capability choice (exa) is honored even though it's
-        # unavailable, so its setup error surfaces — we don't silently reroute
-        # to the shared backend (or the keyless Parallel default).
-        assert web_tools._get_search_backend() == "exa"
+        # Should fall back to firecrawl since exa isn't configured
+        assert web_tools._get_search_backend() == "firecrawl"

    def test_fully_backward_compatible_with_web_backend_only(self, monkeypatch):
        from tools import web_tools
@ -308,55 +291,26 @@ class TestUnconfiguredErrorEnvelopeParity:
        ):
            monkeypatch.delenv(k, raising=False)

-    def test_extract_empty_urls_does_not_raise(self, monkeypatch):
-        """Regression: empty (or fully SSRF-blocked) URL sets skip the dispatch
-        branch; the free-Parallel flag must still be initialized so the tool
-        returns an error envelope instead of UnboundLocalError."""
-        import asyncio
-        from tools import web_tools
-        self._clear_web_creds(monkeypatch)
-        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
-        out = asyncio.run(web_tools.web_extract_tool([], "markdown"))
-        # The key assertion is that it returns a normal error envelope (a
-        # string) rather than raising UnboundLocalError.
-        assert isinstance(out, str)
-        result = json.loads(out)
-        assert "error" in result
-
-    def test_unconfigured_search_falls_back_to_free_parallel(self, monkeypatch):
-        """``web_search_tool`` with no creds routes to Parallel's free Search
-        MCP rather than erroring. The MCP transport is mocked so the test
-        stays offline; we assert dispatch landed on parallel and returned the
-        standard search envelope.
+    def test_unconfigured_search_emits_top_level_error(self, monkeypatch):
+        """``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
+        — matching main's ``tool_error()`` envelope, not a per-result shape.
        """
        from tools import web_tools
-        import plugins.web.parallel.provider as parallel_provider

        self._clear_web_creds(monkeypatch)
+        # Reset firecrawl client cache so the unconfigured state is re-evaluated
        monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
        monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
+        monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})

-        captured = {}
-
-        def _fake_mcp(query, limit, api_key):
-            captured["query"] = query
-            captured["api_key"] = api_key
-            return {
-                "success": True,
-                "data": {"web": [
-                    {"url": "https://example.com", "title": "Example",
-                     "description": "hit", "position": 1},
-                ]},
-            }
-
-        monkeypatch.setattr(parallel_provider, "_mcp_web_search", _fake_mcp)
-
        result = json.loads(web_tools.web_search_tool("hello world", limit=3))
-        assert result.get("success") is True, f"expected success, got {result}"
-        assert result["data"]["web"][0]["url"] == "https://example.com"
-        # Keyless path: dispatched to parallel with no Bearer token.
-        assert captured == {"query": "hello world", "api_key": None}
+        assert "error" in result, f"expected top-level 'error' key, got {result}"
+        # ``Error searching web:`` prefix comes from web_tools' top-level except handler
+        assert "Error searching web:" in result["error"]
+        assert "FIRECRAWL_API_KEY" in result["error"]
+        # No per-result burying
+        assert "results" not in result


 class TestDispatchersTriggerPluginDiscovery:
--- a/tests/tools/test_web_providers_ddgs.py
+++ b/tests/tools/test_web_providers_ddgs.py
@ -190,11 +190,7 @@ class TestDDGSBackendWiring:
        monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
        assert web_tools._get_backend() == "exa"

-    def test_auto_detect_prefers_keyless_parallel_over_ddgs(self, monkeypatch):
-        # With no credentials, keyless Parallel is the auto-detect default even
-        # when the ddgs package is installed — ddgs is search-only (can't
-        # extract), so Parallel is preferred so both search and extract work.
-        # ddgs remains reachable via an explicit web.backend=ddgs.
+    def test_auto_detect_picks_ddgs_as_last_resort(self, monkeypatch):
        from tools import web_tools
        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
        for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY",
@ -202,7 +198,7 @@ class TestDDGSBackendWiring:
            monkeypatch.delenv(key, raising=False)
        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
        monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
-        assert web_tools._get_backend() == "parallel"
+        assert web_tools._get_backend() == "ddgs"

    def test_check_web_api_key_true_when_ddgs_configured(self, monkeypatch):
        from tools import web_tools
--- a/tests/tools/test_web_providers_searxng.py
+++ b/tests/tools/test_web_providers_searxng.py
@ -313,9 +313,7 @@ class TestCheckWebApiKey:
        )
        assert web_tools.check_web_api_key() is True

-    def test_no_credentials_usable_via_free_parallel(self, monkeypatch):
-        """No credentials → check_web_api_key True: the keyless Parallel free MCP
-        services calls, so web is usable out of the box."""
+    def test_no_credentials_fails(self, monkeypatch):
        from tools import web_tools
        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
        monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
@ -327,7 +325,7 @@ class TestCheckWebApiKey:
        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
        monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
        monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
-        assert web_tools.check_web_api_key() is True
+        assert web_tools.check_web_api_key() is False


 # ---------------------------------------------------------------------------
--- a/tests/tools/test_web_tools_config.py
+++ b/tests/tools/test_web_tools_config.py
@ -384,14 +384,12 @@ class TestBackendSelection:
             patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
            assert _get_backend() == "firecrawl"

-    def test_fallback_no_keys_defaults_to_parallel(self):
-        """No credentials, no config → 'parallel' (free Search MCP works
-        keyless). Selection is purely credential-based."""
+    def test_fallback_no_keys_defaults_to_firecrawl(self):
+        """No keys, no config → 'firecrawl' (will fail at client init)."""
        from tools.web_tools import _get_backend
        with patch("tools.web_tools._load_web_config", return_value={}), \
-             patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
             patch("tools.web_tools._ddgs_package_importable", return_value=False):
-            assert _get_backend() == "parallel"
+            assert _get_backend() == "firecrawl"

    def test_invalid_config_falls_through_to_fallback(self):
        """web.backend=invalid → ignored, uses key-based fallback."""
@ -626,73 +624,9 @@ class TestCheckWebApiKey:
            from tools.web_tools import check_web_api_key
            assert check_web_api_key() is True

-    def test_no_keys_usable_via_free_parallel(self):
-        """No credentials → check_web_api_key True: selection resolves to the
-        keyless Parallel free MCP, which genuinely services calls (web works out
-        of the box). check_web_api_key is a usability probe, not a key check."""
+    def test_no_keys_returns_false(self):
        from tools.web_tools import check_web_api_key
-        with patch("tools.web_tools._load_web_config", return_value={}), \
-             patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
-             patch("tools.web_tools._ddgs_package_importable", return_value=False), \
-             patch.dict(os.environ, {}, clear=False):
-            for k in ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
-                      "TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"):
-                os.environ.pop(k, None)
-            assert check_web_api_key() is True
-
-    def test_typo_extract_backend_not_masked_by_parallel(self):
-        """A typo'd per-capability backend is honored (so dispatch errors)
-        rather than silently falling through to keyless Parallel."""
-        from tools.web_tools import _get_extract_backend, check_web_api_key
-        with patch("tools.web_tools._load_web_config",
-                   return_value={"extract_backend": "parrallel"}):
-            assert _get_extract_backend() == "parrallel"   # not "parallel"
-            assert check_web_api_key() is False            # unknown → unusable
-
-    def test_keyless_parallel_unusable_when_provider_disabled(self):
-        """If the bundled web-parallel provider is disabled/unregistered, the
-        keyless free-MCP path must NOT report web as usable — otherwise setup is
-        skipped but web tools fail at runtime with no provider."""
-        from tools.web_tools import check_web_api_key
-        with patch("tools.web_tools._load_web_config", return_value={}), \
-             patch("tools.web_tools._parallel_provider_registered", return_value=False), \
-             patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
-             patch("tools.web_tools.check_firecrawl_api_key", return_value=False), \
-             patch("tools.web_tools._ddgs_package_importable", return_value=False), \
-             patch.dict(os.environ, {}, clear=False):
-            for var in (
-                "PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
-                "TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY", "SEARXNG_URL",
-            ):
-                os.environ.pop(var, None)
-            assert check_web_api_key() is False
-
-    def test_extract_autodetect_skips_search_only_for_keyless_parallel(self):
-        """A search-only env credential (SEARXNG_URL) must not shadow the keyless
-        Parallel free-MCP extract fallback: extract auto-detect skips search-only
-        backends, so _get_extract_backend resolves to parallel (which can fetch),
-        while search auto-detect still prefers the configured searxng."""
-        from tools.web_tools import _get_extract_backend, _get_search_backend
-        with patch("tools.web_tools._load_web_config", return_value={}), \
-             patch.dict(os.environ, {}, clear=False):
-            for var in (
-                "PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
-                "TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY",
-            ):
-                os.environ.pop(var, None)
-            os.environ["SEARXNG_URL"] = "http://localhost:8080"
-            with patch("tools.web_tools._is_tool_gateway_ready", return_value=False):
-                assert _get_search_backend() == "searxng"
-                assert _get_extract_backend() == "parallel"
-
-    def test_configured_but_unavailable_backend_reports_unusable(self):
-        """An explicitly configured backend with no creds (exa, no key) →
-        check_web_api_key False so diagnostics flag the misconfiguration —
-        even though the tools stay registered."""
-        from tools.web_tools import check_web_api_key
-        with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
-             patch.dict(os.environ, {}, clear=False):
-            os.environ.pop("EXA_API_KEY", None)
+        with patch("tools.web_tools._ddgs_package_importable", return_value=False):
            assert check_web_api_key() is False

    def test_both_keys_returns_true(self):
@ -756,18 +690,12 @@ class TestCheckWebApiKey:

        assert refresh_calls == []

-    def test_web_tools_registered_even_when_configured_backend_unavailable(self):
-        # Registration is unconditional (web_tools_registered) so an explicitly
-        # configured but unavailable backend (exa without EXA_API_KEY) keeps the
-        # tools registered to surface exa's setup error at call time — while the
-        # readiness probe (check_web_api_key) honestly reports not-configured.
-        from tools.web_tools import web_tools_registered, check_web_api_key
-        assert web_tools_registered() is True
-        with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
-             patch.dict(os.environ, {}, clear=False):
-            os.environ.pop("EXA_API_KEY", None)
-            assert web_tools_registered() is True
-            assert check_web_api_key() is False
+    def test_configured_backend_must_match_available_provider(self):
+        with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
+            with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"):
+                with patch.dict(os.environ, {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"}, clear=False):
+                    from tools.web_tools import check_web_api_key
+                    assert check_web_api_key() is False

    def test_configured_firecrawl_backend_accepts_managed_gateway(self):
        with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}):
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@ -90,7 +90,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
    # ─── Web search backends ───────────────────────────────────────────────
    "search.exa": ("exa-py==2.10.2",),
    "search.firecrawl": ("firecrawl-py==4.17.0",),
-    "search.parallel": ("parallel-web==0.6.0",),
+    "search.parallel": ("parallel-web==0.4.2",),

    # ─── TTS providers ─────────────────────────────────────────────────────
    # Pinned to exact versions to match pyproject.toml's no-ranges policy
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -141,35 +141,15 @@ def _load_web_config() -> dict:
    except (ImportError, Exception):
        return {}

-# Recognized web backend names (config values accepted in ``web.backend`` /
-# ``web.search_backend`` / ``web.extract_backend``). Kept as a single source of
-# truth for config validation across the selection helpers.
-_KNOWN_WEB_BACKENDS = frozenset(
-    {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}
-)
-
-# Backends that only service web_search (their provider's ``supports_extract()``
-# is False). They are skipped during *extract* auto-detect so a search-only
-# credential (e.g. SEARXNG_URL) does not shadow the keyless Parallel free-MCP
-# fallback, which would otherwise leave web_extract broken on a no-key install.
-_SEARCH_ONLY_BACKENDS = frozenset({"searxng", "brave-free", "ddgs", "xai"})
-
-
-def _get_backend(capability: str = "search") -> str:
+def _get_backend() -> str:
    """Determine which web backend to use (shared fallback).

    Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
    Falls back to whichever API key is present for users who configured
    keys manually without running setup.
-
-    ``capability`` ("search" | "extract") only affects auto-detect: for
-    ``extract`` we skip search-only backends (``_SEARCH_ONLY_BACKENDS``) so a
-    search-only credential never shadows the keyless Parallel free-MCP extract
-    fallback. An explicit ``web.backend`` value is honored as-is (explicit wins,
-    surfacing that backend's own search-only error rather than rerouting).
    """
    configured = (_load_web_config().get("backend") or "").lower().strip()
-    if configured in _KNOWN_WEB_BACKENDS:
+    if configured in {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}:
        return configured

    # Fallback for manual / legacy config — pick the highest-priority
@ -178,8 +158,7 @@ def _get_backend(capability: str = "search") -> str:
    # pre-empted by a Nous OAuth token whose subscription tier may not
    # actually grant web-search access (the gateway then fails at runtime
    # with "no subscription" and the tool returns an error to the agent
-    # without falling back). Free-tier backends (searxng / brave-free /
-    # keyless parallel / ddgs) trail the keyed ones.
+    # without falling back). Free-tier backends trail the paid ones.
    backend_candidates = (
        ("tavily", _has_env("TAVILY_API_KEY")),
        ("exa", _has_env("EXA_API_KEY")),
@ -188,24 +167,13 @@ def _get_backend(capability: str = "search") -> str:
        ("firecrawl", _is_tool_gateway_ready()),
        ("searxng", _has_env("SEARXNG_URL")),
        ("brave-free", _has_env("BRAVE_SEARCH_API_KEY")),
-        # Keyless Parallel free MCP — always available, the intended no-key
-        # default for both search and extract. Ahead of ddgs (search-only, so it
-        # can't service web_extract); ddgs stays reachable via web.backend=ddgs.
-        ("parallel", True),
        ("ddgs", _ddgs_package_importable()),
    )
    for backend, available in backend_candidates:
-        if not available:
-            continue
-        # For extract, skip search-only backends so the keyless Parallel
-        # free-MCP fallback (which can fetch URLs) is reached instead.
-        if capability == "extract" and backend in _SEARCH_ONLY_BACKENDS:
-            continue
-        return backend
+        if available:
+            return backend

-    # Defensive terminal (the keyless ``parallel`` candidate above is always
-    # available, so this is effectively unreachable).
-    return "parallel"
+    return "firecrawl"  # default (backward compat)


 def _get_search_backend() -> str:
@ -236,19 +204,14 @@ def _get_extract_backend() -> str:
 def _get_capability_backend(capability: str) -> str:
    """Shared helper for per-capability backend selection.

-    Reads ``web.{capability}_backend`` from config. Any explicit value is
-    honored **regardless of availability** — including unrecognized typos like
-    ``parrallel`` — so the dispatcher surfaces that backend's own setup/config
-    error rather than silently rerouting to the keyless Parallel default (which
-    would send user queries to a different provider and hide the
-    misconfiguration). This matches ``web_search_registry``'s "explicit config
-    wins" rule. Only an *unset* value falls through to ``_get_backend()``.
+    Reads ``web.{capability}_backend`` from config; if set and available,
+    uses it. Otherwise falls through to the shared ``_get_backend()``.
    """
    cfg = _load_web_config()
    specific = (cfg.get(f"{capability}_backend") or "").lower().strip()
-    if specific:
+    if specific and _is_backend_available(specific):
        return specific
-    return _get_backend(capability)
+    return _get_backend()


 def _is_backend_available(backend: str) -> bool:
@ -256,8 +219,6 @@ def _is_backend_available(backend: str) -> bool:
    if backend == "exa":
        return _has_env("EXA_API_KEY")
    if backend == "parallel":
-        # Credential probe: True only with a real key. The keyless free-MCP
-        # fallback is handled by _get_backend()'s terminal default, not here.
        return _has_env("PARALLEL_API_KEY")
    if backend == "firecrawl":
        return check_firecrawl_api_key()
@ -810,17 +771,6 @@ def _ensure_web_plugins_loaded() -> None:
    Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` exactly:
    the underlying discovery call is idempotent and cheap on subsequent
    invocations.
-
-    Triggering discovery is necessary but not *sufficient*: the sweep can
-    finish without registering the bundled web providers (its exception
-    swallowed below as a warning, a packaged layout where discovery ran before
-    the bundled tree was importable, or a stale empty-discovery cache). When
-    that happens the registry is empty and *both* web_search and web_extract
-    dead-end on "No web {search,extract} provider configured" — even though the
-    keyless Parallel default is supposed to work with zero setup. So after
-    discovery we verify the keyless default landed and, if not, register the
-    bundled providers directly (see
-    :func:`_register_bundled_web_providers_directly`).
    """
    try:
        from hermes_cli.plugins import _ensure_plugins_discovered
@ -833,87 +783,6 @@ def _ensure_web_plugins_loaded() -> None:
        # clue in normal logs about the real cause.
        logger.warning("Web plugin discovery failed (non-fatal): %s", exc)

-    # Belt-and-suspenders: guarantee the keyless Parallel default (the
-    # documented zero-setup backend for both web_search and web_extract) is
-    # actually registered. The lookup is a cheap dict hit on the healthy path
-    # (discovery already registered it → no-op); only an empty registry pays
-    # for the direct-registration sweep.
-    try:
-        from agent.web_search_registry import get_provider
-
-        if get_provider("parallel") is None:
-            _register_bundled_web_providers_directly()
-    except Exception as exc:  # noqa: BLE001
-        logger.debug("Bundled web provider fallback check failed: %s", exc)
-
-
-def _register_bundled_web_providers_directly() -> None:
-    """Register the repo's bundled web providers without the plugin manager.
-
-    The normal path is the general plugin sweep
-    (:func:`hermes_cli.plugins._ensure_plugins_discovered`), which auto-loads
-    every ``plugins/web/<name>`` backend (they are ``kind: backend``). This
-    fallback exists for the runtimes where that sweep does not leave the web
-    registry populated — so the keyless Parallel default (and any bundled
-    backend the user explicitly configured) keeps working instead of
-    surfacing a misleading "No web provider configured" error.
-
-    Imports each bundled ``plugins/web/<name>`` package and calls its
-    ``register()`` directly against :mod:`agent.web_search_registry`. Idempotent
-    (re-register overwrites) and honors an explicit ``plugins.disabled`` entry
-    so a backend the user turned off stays off.
-    """
-    try:
-        from hermes_cli.plugins import (
-            _get_disabled_plugins,
-            get_bundled_plugins_dir,
-        )
-    except Exception as exc:  # noqa: BLE001
-        logger.debug("Bundled web provider fallback unavailable: %s", exc)
-        return
-
-    web_dir = get_bundled_plugins_dir() / "web"
-    if not web_dir.is_dir():
-        return
-
-    disabled = _get_disabled_plugins()
-
-    from agent.web_search_provider import WebSearchProvider
-    from agent.web_search_registry import register_provider
-
-    class _DirectRegistrationCtx:
-        """Minimal plugin ctx exposing only web-provider registration."""
-
-        def register_web_search_provider(self, provider) -> None:
-            if isinstance(provider, WebSearchProvider):
-                register_provider(provider)
-
-    ctx = _DirectRegistrationCtx()
-    import importlib
-
-    for child in sorted(web_dir.iterdir()):
-        if not child.is_dir():
-            continue
-        if not (child / "plugin.yaml").exists() and not (child / "plugin.yml").exists():
-            continue
-        # Respect an explicit disable — match discover_and_load's key/name
-        # check (key ``web/<dir>``; manifest name ``web-<dir-with-dashes>``).
-        if (
-            f"web/{child.name}" in disabled
-            or f"web-{child.name.replace('_', '-')}" in disabled
-        ):
-            continue
-        try:
-            module = importlib.import_module(f"plugins.web.{child.name}")
-            register_fn = getattr(module, "register", None)
-            if callable(register_fn):
-                register_fn(ctx)
-        except Exception as exc:  # noqa: BLE001
-            logger.debug(
-                "Direct registration of bundled web provider '%s' failed: %s",
-                child.name, exc,
-            )
-

 def web_search_tool(query: str, limit: int = 5) -> str:
    """
@ -1103,19 +972,11 @@ async def web_extract_tool(
            else:
                safe_urls.append(url)

-        # Tracks the free-tier Parallel extract path (no key → web_fetch via the
-        # hosted Search MCP) so we can credit Parallel in the output/UI. Bound
-        # here so empty/all-blocked inputs (which skip dispatch) stay defined.
-        _free_parallel_extract = False
-
        # Dispatch only safe URLs to the configured backend
        if not safe_urls:
            results = []
        else:
            backend = _get_extract_backend()
-            _free_parallel_extract = (
-                backend == "parallel" and not _has_env("PARALLEL_API_KEY")
-            )

            # All seven providers (brave-free, ddgs, searxng, exa, parallel,
            # tavily, firecrawl) now live as plugins. The dispatcher is a
@ -1289,14 +1150,6 @@ async def web_extract_tool(
            for r in response.get("results", [])
        ]
        trimmed_response = {"results": trimmed_results}
-        if _free_parallel_extract:
-            # Credit Parallel's free Search MCP (drives the "[Parallel]" UI tag
-            # + lets the model cite the source). Free tier only.
-            trimmed_response["provider"] = "parallel"
-            trimmed_response["attribution"] = (
-                "Extraction powered by the free Parallel Web Search MCP "
-                "(https://parallel.ai)."
-            )

        if trimmed_response.get("results") == []:
            result_json = tool_error("Content was inaccessible or not found")
@ -1328,61 +1181,16 @@ async def web_extract_tool(
        return tool_error(error_msg)


-def web_tools_registered() -> bool:
-    """Whether the web tools should be registered. Always True.
-
-    Registration is decoupled from credential readiness: with no credentials,
-    search/extract fall back to Parallel's free hosted Search MCP, and an
-    explicitly configured-but-unavailable backend must stay registered so
-    dispatch surfaces that backend's own setup error rather than the tool
-    silently vanishing. For "is web actually configured?" use
-    :func:`check_web_api_key`.
-    """
-    return True
-
-
-def _parallel_provider_registered() -> bool:
-    """True when the bundled ``web-parallel`` provider is registered/enabled.
-
-    Plugin discovery skips disabled plugins, so a disabled (``plugins.disabled``)
-    or otherwise-unregistered parallel provider yields ``None`` here.
-    """
-    _ensure_web_plugins_loaded()
-    try:
-        from agent.web_search_registry import get_provider
-
-        return get_provider("parallel") is not None
-    except Exception:  # noqa: BLE001
-        return False
-
-
-def _backend_usable(backend: str) -> bool:
-    """True when *backend* can service calls. Keyless Parallel counts (free MCP).
-
-    Unknown/typo'd backend names are not usable (so an explicit typo is reported
-    as a config problem rather than masked by the keyless fallback).
-    """
-    if backend == "parallel" and not _has_env("PARALLEL_API_KEY"):
-        # Keyless Parallel is only genuinely usable when its provider is actually
-        # registered/enabled. If web-parallel is disabled or discovery failed,
-        # report unusable so setup is not skipped and the user is not left with
-        # web tools that fail at runtime ("No web search provider configured").
-        return _parallel_provider_registered()
-    return _is_backend_available(backend)
-
-
+# Convenience function to check Firecrawl credentials
 def check_web_api_key() -> bool:
-    """Usability probe: True when the selected web backends can service calls.
-
-    Probes the backends that :func:`_get_search_backend` /
-    :func:`_get_extract_backend` actually select (not just shared
-    ``web.backend``), so an explicit per-capability backend with missing
-    credentials — or a typo'd name — reports unusable instead of being masked by
-    the keyless Parallel fallback. Keyless Parallel itself genuinely services
-    calls, so a zero-setup install reports usable. Distinct from
-    :func:`web_tools_registered` (always True — whether the tool is offered).
-    """
-    return _backend_usable(_get_search_backend()) and _backend_usable(_get_extract_backend())
+    """Check whether the configured web backend is available."""
+    configured = _load_web_config().get("backend", "").lower().strip()
+    if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}:
+        return _is_backend_available(configured)
+    return any(
+        _is_backend_available(backend)
+        for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai")
+    )


 def check_auxiliary_model() -> bool:
@ -1550,7 +1358,7 @@ registry.register(
    toolset="web",
    schema=WEB_SEARCH_SCHEMA,
    handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=args.get("limit", 5)),
-    check_fn=web_tools_registered,
+    check_fn=check_web_api_key,
    requires_env=_web_requires_env(),
    emoji="🔍",
    max_result_size_chars=100_000,
@ -1561,7 +1369,7 @@ registry.register(
    schema=WEB_EXTRACT_SCHEMA,
    handler=lambda args, **kw: web_extract_tool(
        args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"),
-    check_fn=web_tools_registered,
+    check_fn=check_web_api_key,
    requires_env=_web_requires_env(),
    is_async=True,
    emoji="📄",
--- a/uv.lock
+++ b/uv.lock
@ -1654,7 +1654,7 @@ requires-dist = [
    { name = "numpy", marker = "extra == 'voice'", specifier = "==2.4.3" },
    { name = "openai", specifier = "==2.24.0" },
    { name = "packaging", specifier = "==26.0" },
-    { name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.6.0" },
+    { name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.4.2" },
    { name = "pathspec", specifier = "==1.1.1" },
    { name = "pillow", specifier = "==12.2.0" },
    { name = "prompt-toolkit", specifier = "==3.0.52" },
@ -2690,7 +2690,7 @@ wheels = [

 [[package]]
 name = "parallel-web"
-version = "0.6.0"
+version = "0.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "anyio" },
@ -2700,9 +2700,9 @@ dependencies = [
    { name = "sniffio" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7f/81/101c961fe6665212df01fb39a70ebb379dc33529c7bc9210675c0f525139/parallel_web-0.6.0.tar.gz", hash = "sha256:f8aecd3f1958090090c4516881cefea4f55c40948ba3bb99217ca9a6d4263225", size = 173149, upload-time = "2026-05-06T19:13:09.782Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/50/fb9b28a679e01682006b5259abff96de3d16e114e9447a7793fec31715de/parallel_web-0.4.2.tar.gz", hash = "sha256:599b5a8f387dc35c7dc8c81e372eadf6958a40acacea58bf170dfc663c003da7", size = 140026, upload-time = "2026-03-09T22:24:35.448Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/7c/7e8b63a0e90efaf567a818fca86c6ad3a85711f8995d2657b51b0cae2351/parallel_web-0.6.0-py3-none-any.whl", hash = "sha256:dc5342ef7262bd2e9f85eb7eace32833bd3d7e3af0bf5fbd780d1ea8c8d9ceb0", size = 199217, upload-time = "2026-05-06T19:13:08.316Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/3e/2218fa29637781b8e7ac35a928108ff2614ddd40879389d3af2caa725af5/parallel_web-0.4.2-py3-none-any.whl", hash = "sha256:aa3a4a9aecc08972c5ce9303271d4917903373dff4dd277d9a3e30f9cff53346", size = 144012, upload-time = "2026-03-09T22:24:33.979Z" },
 ]

 [[package]]