mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(web): preserve top-level error envelope on unconfigured systems
Surfaced by local E2E behavior-parity testing of PR vs origin/main: the
plugin-migrated dispatchers were quietly changing the error envelope
shape returned to function-calling models on unconfigured systems.
Two findings, both from per-result error wrapping bleeding into the
pre-flight configuration error path:
1. **search**: ``firecrawl.search()`` caught the
``ValueError("Web tools are not configured...")`` from
``_get_firecrawl_client()`` and returned it as
``{"success": False, "error": ...}``, losing the legacy
``{"error": "Error searching web: ..."}`` envelope that
``tool_error()`` emits on main. Models that special-case the
``error`` key still detect the failure, but the prefix is part of
the legacy contract some users rely on.
2. **crawl**: ``firecrawl.crawl()`` caught the same pre-flight
``ValueError`` and wrapped it as a per-page error inside
``results[0]``. Main short-circuits on ``check_firecrawl_api_key()``
BEFORE dispatching, so its unconfigured response is
``{"success": False, "error": "web_crawl requires Firecrawl..."}``
at the top level. The PR's per-page burying hid the failure inside
``results[]`` where models that check ``result.get("error")`` would
miss it.
Fix:
- ``plugins/web/firecrawl/provider.py``: pull
``_get_firecrawl_client()`` outside the broad ``try`` in
``search()``. Pre-flight ``ValueError`` / ``ImportError`` propagate
to the dispatcher's top-level exception handler. In-flight SDK
errors still get wrapped as ``{"success": False, ...}``.
- ``tools/web_tools.py``: mirror main's upstream availability gate in
``web_crawl_tool``. When the resolved crawl provider is
``is_available()==False``, short-circuit BEFORE dispatching with the
same top-level error shape main emits.
- ``tests/tools/test_web_providers.py``: 2 regression tests
(``TestUnconfiguredErrorEnvelopeParity``) lock in the behavior so
future plugin work can't undo this.
Verified via local subprocess-based parity test (14/14 scenarios match
origin/main shape exactly) and full 210/210 web test suite green.
This commit is contained in:
parent
657e6d87cc
commit
4ca5e72444
3 changed files with 105 additions and 11 deletions
|
|
@ -390,22 +390,28 @@ class FirecrawlWebSearchProvider(WebSearchProvider):
|
|||
Sync; matches the legacy ``_get_firecrawl_client().search(...)``
|
||||
call directly. Normalizes the response across SDK/direct/gateway
|
||||
shapes via :func:`_extract_web_search_results`.
|
||||
|
||||
Pre-flight errors (``ValueError`` from configuration check,
|
||||
``ImportError`` from missing SDK) propagate to the dispatcher's
|
||||
top-level handler, which wraps them as ``tool_error(...)`` —
|
||||
matching the legacy ``{"error": "Error searching web: ..."}``
|
||||
envelope. Only in-flight errors are caught and surfaced as
|
||||
``{"success": False, "error": ...}``.
|
||||
"""
|
||||
from tools.interrupt import is_interrupted
|
||||
|
||||
if is_interrupted():
|
||||
return {"success": False, "error": "Interrupted"}
|
||||
|
||||
logger.info("Firecrawl search: '%s' (limit=%d)", query, limit)
|
||||
# _get_firecrawl_client() raises ValueError on unconfigured systems —
|
||||
# let it propagate so the dispatcher emits the legacy envelope shape.
|
||||
client = _get_firecrawl_client()
|
||||
try:
|
||||
from tools.interrupt import is_interrupted
|
||||
|
||||
if is_interrupted():
|
||||
return {"success": False, "error": "Interrupted"}
|
||||
|
||||
logger.info("Firecrawl search: '%s' (limit=%d)", query, limit)
|
||||
response = _get_firecrawl_client().search(query=query, limit=limit)
|
||||
response = client.search(query=query, limit=limit)
|
||||
web_results = _extract_web_search_results(response)
|
||||
logger.info("Firecrawl: found %d search results", len(web_results))
|
||||
return {"success": True, "data": {"web": web_results}}
|
||||
except ValueError as exc:
|
||||
return {"success": False, "error": str(exc)}
|
||||
except ImportError as exc:
|
||||
return {"success": False, "error": f"Firecrawl SDK not installed: {exc}"}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("Firecrawl search error: %s", exc)
|
||||
return {"success": False, "error": f"Firecrawl search failed: {exc}"}
|
||||
|
|
|
|||
|
|
@ -263,3 +263,72 @@ class TestWebSearchUsesSearchBackend:
|
|||
|
||||
assert len(called_with) > 0
|
||||
assert called_with[0][0] == "search"
|
||||
|
||||
|
||||
class TestUnconfiguredErrorEnvelopeParity:
|
||||
"""Regression tests for PR #25182: the post-migration dispatcher must
|
||||
emit the same top-level error envelope as pre-migration main when no
|
||||
web backend is configured.
|
||||
|
||||
Plugin-level error wrapping is correct for in-flight errors (per-page
|
||||
SDK exceptions, scrape timeouts) but PRE-FLIGHT configuration errors
|
||||
must surface at the top level so function-calling models that check
|
||||
``result.get("error")`` detect the failure cleanly.
|
||||
"""
|
||||
|
||||
def _clear_web_creds(self, monkeypatch):
|
||||
for k in (
|
||||
"BRAVE_SEARCH_API_KEY",
|
||||
"SEARXNG_URL",
|
||||
"TAVILY_API_KEY",
|
||||
"EXA_API_KEY",
|
||||
"PARALLEL_API_KEY",
|
||||
"FIRECRAWL_API_KEY",
|
||||
"FIRECRAWL_API_URL",
|
||||
"FIRECRAWL_GATEWAY_URL",
|
||||
"TOOL_GATEWAY_DOMAIN",
|
||||
):
|
||||
monkeypatch.delenv(k, raising=False)
|
||||
|
||||
def test_unconfigured_search_emits_top_level_error(self, monkeypatch):
|
||||
"""``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
|
||||
— matching main's ``tool_error()`` envelope, not a per-result shape.
|
||||
"""
|
||||
import json
|
||||
from tools import web_tools
|
||||
|
||||
self._clear_web_creds(monkeypatch)
|
||||
# Reset firecrawl client cache so the unconfigured state is re-evaluated
|
||||
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||
|
||||
result = json.loads(web_tools.web_search_tool("hello world", limit=3))
|
||||
assert "error" in result, f"expected top-level 'error' key, got {result}"
|
||||
# ``Error searching web:`` prefix comes from web_tools' top-level except handler
|
||||
assert "Error searching web:" in result["error"]
|
||||
assert "FIRECRAWL_API_KEY" in result["error"]
|
||||
# No per-result burying
|
||||
assert "results" not in result
|
||||
|
||||
def test_unconfigured_crawl_emits_top_level_error(self, monkeypatch):
|
||||
"""``web_crawl_tool`` with no creds returns ``{"success": False, "error": "web_crawl requires Firecrawl..."}``
|
||||
— the dispatcher gates on ``provider.is_available()`` BEFORE
|
||||
delegating to the plugin so pre-config errors don't get wrapped
|
||||
into ``results[]``.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
from tools import web_tools
|
||||
|
||||
self._clear_web_creds(monkeypatch)
|
||||
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||
|
||||
result = json.loads(asyncio.run(web_tools.web_crawl_tool("https://example.com", use_llm_processing=False)))
|
||||
assert result.get("success") is False
|
||||
assert "error" in result, f"expected top-level 'error' key, got {result}"
|
||||
assert "web_crawl requires Firecrawl" in result["error"]
|
||||
# Crucially: no per-page burying
|
||||
assert "results" not in result
|
||||
|
|
|
|||
|
|
@ -1192,6 +1192,25 @@ async def web_crawl_tool(
|
|||
if crawl_provider is None:
|
||||
crawl_provider = get_active_crawl_provider()
|
||||
|
||||
# Mirror main's upstream availability gate: when the resolved
|
||||
# provider is configured-but-unavailable (e.g. firecrawl without
|
||||
# FIRECRAWL_API_KEY), short-circuit BEFORE we dispatch so the
|
||||
# error envelope matches the legacy top-level shape
|
||||
# ``{"success": False, "error": "..."}`` rather than burying the
|
||||
# configuration message inside a per-page ``results[]`` entry.
|
||||
if crawl_provider is not None and not crawl_provider.is_available():
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": (
|
||||
"web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, "
|
||||
f"FIRECRAWL_API_URL{_firecrawl_backend_help_suffix()}, "
|
||||
"or use web_search + web_extract instead."
|
||||
),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
if crawl_provider is not None:
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue