From 4ca5e724446a2294bbe69090884252eee326f2a7 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 14 May 2026 02:06:45 +0530 Subject: [PATCH] fix(web): preserve top-level error envelope on unconfigured systems Surfaced by local E2E behavior-parity testing of PR vs origin/main: the plugin-migrated dispatchers were quietly changing the error envelope shape returned to function-calling models on unconfigured systems. Two findings, both from per-result error wrapping bleeding into the pre-flight configuration error path: 1. **search**: ``firecrawl.search()`` caught the ``ValueError("Web tools are not configured...")`` from ``_get_firecrawl_client()`` and returned it as ``{"success": False, "error": ...}``, losing the legacy ``{"error": "Error searching web: ..."}`` envelope that ``tool_error()`` emits on main. Models that special-case the ``error`` key still detect the failure, but the prefix is part of the legacy contract some users rely on. 2. **crawl**: ``firecrawl.crawl()`` caught the same pre-flight ``ValueError`` and wrapped it as a per-page error inside ``results[0]``. Main short-circuits on ``check_firecrawl_api_key()`` BEFORE dispatching, so its unconfigured response is ``{"success": False, "error": "web_crawl requires Firecrawl..."}`` at the top level. The PR's per-page burying hid the failure inside ``results[]`` where models that check ``result.get("error")`` would miss it. Fix: - ``plugins/web/firecrawl/provider.py``: pull ``_get_firecrawl_client()`` outside the broad ``try`` in ``search()``. Pre-flight ``ValueError`` / ``ImportError`` propagate to the dispatcher's top-level exception handler. In-flight SDK errors still get wrapped as ``{"success": False, ...}``. - ``tools/web_tools.py``: mirror main's upstream availability gate in ``web_crawl_tool``. When the resolved crawl provider is ``is_available()==False``, short-circuit BEFORE dispatching with the same top-level error shape main emits. - ``tests/tools/test_web_providers.py``: 2 regression tests (``TestUnconfiguredErrorEnvelopeParity``) lock in the behavior so future plugin work can't undo this. Verified via local subprocess-based parity test (14/14 scenarios match origin/main shape exactly) and full 210/210 web test suite green. --- plugins/web/firecrawl/provider.py | 28 ++++++++----- tests/tools/test_web_providers.py | 69 +++++++++++++++++++++++++++++++ tools/web_tools.py | 19 +++++++++ 3 files changed, 105 insertions(+), 11 deletions(-) diff --git a/plugins/web/firecrawl/provider.py b/plugins/web/firecrawl/provider.py index e7d4d378bdc..bcc574ffca3 100644 --- a/plugins/web/firecrawl/provider.py +++ b/plugins/web/firecrawl/provider.py @@ -390,22 +390,28 @@ class FirecrawlWebSearchProvider(WebSearchProvider): Sync; matches the legacy ``_get_firecrawl_client().search(...)`` call directly. Normalizes the response across SDK/direct/gateway shapes via :func:`_extract_web_search_results`. + + Pre-flight errors (``ValueError`` from configuration check, + ``ImportError`` from missing SDK) propagate to the dispatcher's + top-level handler, which wraps them as ``tool_error(...)`` — + matching the legacy ``{"error": "Error searching web: ..."}`` + envelope. Only in-flight errors are caught and surfaced as + ``{"success": False, "error": ...}``. """ + from tools.interrupt import is_interrupted + + if is_interrupted(): + return {"success": False, "error": "Interrupted"} + + logger.info("Firecrawl search: '%s' (limit=%d)", query, limit) + # _get_firecrawl_client() raises ValueError on unconfigured systems — + # let it propagate so the dispatcher emits the legacy envelope shape. + client = _get_firecrawl_client() try: - from tools.interrupt import is_interrupted - - if is_interrupted(): - return {"success": False, "error": "Interrupted"} - - logger.info("Firecrawl search: '%s' (limit=%d)", query, limit) - response = _get_firecrawl_client().search(query=query, limit=limit) + response = client.search(query=query, limit=limit) web_results = _extract_web_search_results(response) logger.info("Firecrawl: found %d search results", len(web_results)) return {"success": True, "data": {"web": web_results}} - except ValueError as exc: - return {"success": False, "error": str(exc)} - except ImportError as exc: - return {"success": False, "error": f"Firecrawl SDK not installed: {exc}"} except Exception as exc: # noqa: BLE001 logger.warning("Firecrawl search error: %s", exc) return {"success": False, "error": f"Firecrawl search failed: {exc}"} diff --git a/tests/tools/test_web_providers.py b/tests/tools/test_web_providers.py index c64b0a1b621..67d39e9a999 100644 --- a/tests/tools/test_web_providers.py +++ b/tests/tools/test_web_providers.py @@ -263,3 +263,72 @@ class TestWebSearchUsesSearchBackend: assert len(called_with) > 0 assert called_with[0][0] == "search" + + +class TestUnconfiguredErrorEnvelopeParity: + """Regression tests for PR #25182: the post-migration dispatcher must + emit the same top-level error envelope as pre-migration main when no + web backend is configured. + + Plugin-level error wrapping is correct for in-flight errors (per-page + SDK exceptions, scrape timeouts) but PRE-FLIGHT configuration errors + must surface at the top level so function-calling models that check + ``result.get("error")`` detect the failure cleanly. + """ + + def _clear_web_creds(self, monkeypatch): + for k in ( + "BRAVE_SEARCH_API_KEY", + "SEARXNG_URL", + "TAVILY_API_KEY", + "EXA_API_KEY", + "PARALLEL_API_KEY", + "FIRECRAWL_API_KEY", + "FIRECRAWL_API_URL", + "FIRECRAWL_GATEWAY_URL", + "TOOL_GATEWAY_DOMAIN", + ): + monkeypatch.delenv(k, raising=False) + + def test_unconfigured_search_emits_top_level_error(self, monkeypatch): + """``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}`` + — matching main's ``tool_error()`` envelope, not a per-result shape. + """ + import json + from tools import web_tools + + self._clear_web_creds(monkeypatch) + # Reset firecrawl client cache so the unconfigured state is re-evaluated + monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False) + monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False) + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + + result = json.loads(web_tools.web_search_tool("hello world", limit=3)) + assert "error" in result, f"expected top-level 'error' key, got {result}" + # ``Error searching web:`` prefix comes from web_tools' top-level except handler + assert "Error searching web:" in result["error"] + assert "FIRECRAWL_API_KEY" in result["error"] + # No per-result burying + assert "results" not in result + + def test_unconfigured_crawl_emits_top_level_error(self, monkeypatch): + """``web_crawl_tool`` with no creds returns ``{"success": False, "error": "web_crawl requires Firecrawl..."}`` + — the dispatcher gates on ``provider.is_available()`` BEFORE + delegating to the plugin so pre-config errors don't get wrapped + into ``results[]``. + """ + import asyncio + import json + from tools import web_tools + + self._clear_web_creds(monkeypatch) + monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False) + monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False) + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + + result = json.loads(asyncio.run(web_tools.web_crawl_tool("https://example.com", use_llm_processing=False))) + assert result.get("success") is False + assert "error" in result, f"expected top-level 'error' key, got {result}" + assert "web_crawl requires Firecrawl" in result["error"] + # Crucially: no per-page burying + assert "results" not in result diff --git a/tools/web_tools.py b/tools/web_tools.py index 1f0fd5fe117..e2743248d22 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -1192,6 +1192,25 @@ async def web_crawl_tool( if crawl_provider is None: crawl_provider = get_active_crawl_provider() + # Mirror main's upstream availability gate: when the resolved + # provider is configured-but-unavailable (e.g. firecrawl without + # FIRECRAWL_API_KEY), short-circuit BEFORE we dispatch so the + # error envelope matches the legacy top-level shape + # ``{"success": False, "error": "..."}`` rather than burying the + # configuration message inside a per-page ``results[]`` entry. + if crawl_provider is not None and not crawl_provider.is_available(): + return json.dumps( + { + "success": False, + "error": ( + "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, " + f"FIRECRAWL_API_URL{_firecrawl_backend_help_suffix()}, " + "or use web_search + web_extract instead." + ), + }, + ensure_ascii=False, + ) + if crawl_provider is not None: # Ensure URL has protocol if not url.startswith(('http://', 'https://')):