fix(web): preserve top-level error envelope on unconfigured systems

Surfaced by local E2E behavior-parity testing of PR vs origin/main: the
plugin-migrated dispatchers were quietly changing the error envelope
shape returned to function-calling models on unconfigured systems.

Two findings, both from per-result error wrapping bleeding into the
pre-flight configuration error path:

1. **search**: ``firecrawl.search()`` caught the
   ``ValueError("Web tools are not configured...")`` from
   ``_get_firecrawl_client()`` and returned it as
   ``{"success": False, "error": ...}``, losing the legacy
   ``{"error": "Error searching web: ..."}`` envelope that
   ``tool_error()`` emits on main. Models that special-case the
   ``error`` key still detect the failure, but the prefix is part of
   the legacy contract some users rely on.

2. **crawl**: ``firecrawl.crawl()`` caught the same pre-flight
   ``ValueError`` and wrapped it as a per-page error inside
   ``results[0]``. Main short-circuits on ``check_firecrawl_api_key()``
   BEFORE dispatching, so its unconfigured response is
   ``{"success": False, "error": "web_crawl requires Firecrawl..."}``
   at the top level. The PR's per-page burying hid the failure inside
   ``results[]`` where models that check ``result.get("error")`` would
   miss it.

Fix:
- ``plugins/web/firecrawl/provider.py``: pull
  ``_get_firecrawl_client()`` outside the broad ``try`` in
  ``search()``. Pre-flight ``ValueError`` / ``ImportError`` propagate
  to the dispatcher's top-level exception handler. In-flight SDK
  errors still get wrapped as ``{"success": False, ...}``.
- ``tools/web_tools.py``: mirror main's upstream availability gate in
  ``web_crawl_tool``. When the resolved crawl provider is
  ``is_available()==False``, short-circuit BEFORE dispatching with the
  same top-level error shape main emits.
- ``tests/tools/test_web_providers.py``: 2 regression tests
  (``TestUnconfiguredErrorEnvelopeParity``) lock in the behavior so
  future plugin work can't undo this.

Verified via local subprocess-based parity test (14/14 scenarios match
origin/main shape exactly) and full 210/210 web test suite green.
This commit is contained in:
kshitijk4poor 2026-05-14 02:06:45 +05:30 committed by Teknium
parent 657e6d87cc
commit 4ca5e72444
3 changed files with 105 additions and 11 deletions

View file

@ -390,22 +390,28 @@ class FirecrawlWebSearchProvider(WebSearchProvider):
Sync; matches the legacy ``_get_firecrawl_client().search(...)``
call directly. Normalizes the response across SDK/direct/gateway
shapes via :func:`_extract_web_search_results`.
Pre-flight errors (``ValueError`` from configuration check,
``ImportError`` from missing SDK) propagate to the dispatcher's
top-level handler, which wraps them as ``tool_error(...)``
matching the legacy ``{"error": "Error searching web: ..."}``
envelope. Only in-flight errors are caught and surfaced as
``{"success": False, "error": ...}``.
"""
from tools.interrupt import is_interrupted
if is_interrupted():
return {"success": False, "error": "Interrupted"}
logger.info("Firecrawl search: '%s' (limit=%d)", query, limit)
# _get_firecrawl_client() raises ValueError on unconfigured systems —
# let it propagate so the dispatcher emits the legacy envelope shape.
client = _get_firecrawl_client()
try:
from tools.interrupt import is_interrupted
if is_interrupted():
return {"success": False, "error": "Interrupted"}
logger.info("Firecrawl search: '%s' (limit=%d)", query, limit)
response = _get_firecrawl_client().search(query=query, limit=limit)
response = client.search(query=query, limit=limit)
web_results = _extract_web_search_results(response)
logger.info("Firecrawl: found %d search results", len(web_results))
return {"success": True, "data": {"web": web_results}}
except ValueError as exc:
return {"success": False, "error": str(exc)}
except ImportError as exc:
return {"success": False, "error": f"Firecrawl SDK not installed: {exc}"}
except Exception as exc: # noqa: BLE001
logger.warning("Firecrawl search error: %s", exc)
return {"success": False, "error": f"Firecrawl search failed: {exc}"}

View file

@ -263,3 +263,72 @@ class TestWebSearchUsesSearchBackend:
assert len(called_with) > 0
assert called_with[0][0] == "search"
class TestUnconfiguredErrorEnvelopeParity:
"""Regression tests for PR #25182: the post-migration dispatcher must
emit the same top-level error envelope as pre-migration main when no
web backend is configured.
Plugin-level error wrapping is correct for in-flight errors (per-page
SDK exceptions, scrape timeouts) but PRE-FLIGHT configuration errors
must surface at the top level so function-calling models that check
``result.get("error")`` detect the failure cleanly.
"""
def _clear_web_creds(self, monkeypatch):
for k in (
"BRAVE_SEARCH_API_KEY",
"SEARXNG_URL",
"TAVILY_API_KEY",
"EXA_API_KEY",
"PARALLEL_API_KEY",
"FIRECRAWL_API_KEY",
"FIRECRAWL_API_URL",
"FIRECRAWL_GATEWAY_URL",
"TOOL_GATEWAY_DOMAIN",
):
monkeypatch.delenv(k, raising=False)
def test_unconfigured_search_emits_top_level_error(self, monkeypatch):
"""``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
matching main's ``tool_error()`` envelope, not a per-result shape.
"""
import json
from tools import web_tools
self._clear_web_creds(monkeypatch)
# Reset firecrawl client cache so the unconfigured state is re-evaluated
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
result = json.loads(web_tools.web_search_tool("hello world", limit=3))
assert "error" in result, f"expected top-level 'error' key, got {result}"
# ``Error searching web:`` prefix comes from web_tools' top-level except handler
assert "Error searching web:" in result["error"]
assert "FIRECRAWL_API_KEY" in result["error"]
# No per-result burying
assert "results" not in result
def test_unconfigured_crawl_emits_top_level_error(self, monkeypatch):
"""``web_crawl_tool`` with no creds returns ``{"success": False, "error": "web_crawl requires Firecrawl..."}``
the dispatcher gates on ``provider.is_available()`` BEFORE
delegating to the plugin so pre-config errors don't get wrapped
into ``results[]``.
"""
import asyncio
import json
from tools import web_tools
self._clear_web_creds(monkeypatch)
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
result = json.loads(asyncio.run(web_tools.web_crawl_tool("https://example.com", use_llm_processing=False)))
assert result.get("success") is False
assert "error" in result, f"expected top-level 'error' key, got {result}"
assert "web_crawl requires Firecrawl" in result["error"]
# Crucially: no per-page burying
assert "results" not in result

View file

@ -1192,6 +1192,25 @@ async def web_crawl_tool(
if crawl_provider is None:
crawl_provider = get_active_crawl_provider()
# Mirror main's upstream availability gate: when the resolved
# provider is configured-but-unavailable (e.g. firecrawl without
# FIRECRAWL_API_KEY), short-circuit BEFORE we dispatch so the
# error envelope matches the legacy top-level shape
# ``{"success": False, "error": "..."}`` rather than burying the
# configuration message inside a per-page ``results[]`` entry.
if crawl_provider is not None and not crawl_provider.is_available():
return json.dumps(
{
"success": False,
"error": (
"web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, "
f"FIRECRAWL_API_URL{_firecrawl_backend_help_suffix()}, "
"or use web_search + web_extract instead."
),
},
ensure_ascii=False,
)
if crawl_provider is not None:
# Ensure URL has protocol
if not url.startswith(('http://', 'https://')):