mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
fix(web): preserve top-level error envelope on unconfigured systems
Surfaced by local E2E behavior-parity testing of PR vs origin/main: the
plugin-migrated dispatchers were quietly changing the error envelope
shape returned to function-calling models on unconfigured systems.
Two findings, both from per-result error wrapping bleeding into the
pre-flight configuration error path:
1. **search**: ``firecrawl.search()`` caught the
``ValueError("Web tools are not configured...")`` from
``_get_firecrawl_client()`` and returned it as
``{"success": False, "error": ...}``, losing the legacy
``{"error": "Error searching web: ..."}`` envelope that
``tool_error()`` emits on main. Models that special-case the
``error`` key still detect the failure, but the prefix is part of
the legacy contract some users rely on.
2. **crawl**: ``firecrawl.crawl()`` caught the same pre-flight
``ValueError`` and wrapped it as a per-page error inside
``results[0]``. Main short-circuits on ``check_firecrawl_api_key()``
BEFORE dispatching, so its unconfigured response is
``{"success": False, "error": "web_crawl requires Firecrawl..."}``
at the top level. The PR's per-page burying hid the failure inside
``results[]`` where models that check ``result.get("error")`` would
miss it.
Fix:
- ``plugins/web/firecrawl/provider.py``: pull
``_get_firecrawl_client()`` outside the broad ``try`` in
``search()``. Pre-flight ``ValueError`` / ``ImportError`` propagate
to the dispatcher's top-level exception handler. In-flight SDK
errors still get wrapped as ``{"success": False, ...}``.
- ``tools/web_tools.py``: mirror main's upstream availability gate in
``web_crawl_tool``. When the resolved crawl provider is
``is_available()==False``, short-circuit BEFORE dispatching with the
same top-level error shape main emits.
- ``tests/tools/test_web_providers.py``: 2 regression tests
(``TestUnconfiguredErrorEnvelopeParity``) lock in the behavior so
future plugin work can't undo this.
Verified via local subprocess-based parity test (14/14 scenarios match
origin/main shape exactly) and full 210/210 web test suite green.
This commit is contained in:
parent
657e6d87cc
commit
4ca5e72444
3 changed files with 105 additions and 11 deletions
|
|
@ -390,22 +390,28 @@ class FirecrawlWebSearchProvider(WebSearchProvider):
|
||||||
Sync; matches the legacy ``_get_firecrawl_client().search(...)``
|
Sync; matches the legacy ``_get_firecrawl_client().search(...)``
|
||||||
call directly. Normalizes the response across SDK/direct/gateway
|
call directly. Normalizes the response across SDK/direct/gateway
|
||||||
shapes via :func:`_extract_web_search_results`.
|
shapes via :func:`_extract_web_search_results`.
|
||||||
|
|
||||||
|
Pre-flight errors (``ValueError`` from configuration check,
|
||||||
|
``ImportError`` from missing SDK) propagate to the dispatcher's
|
||||||
|
top-level handler, which wraps them as ``tool_error(...)`` —
|
||||||
|
matching the legacy ``{"error": "Error searching web: ..."}``
|
||||||
|
envelope. Only in-flight errors are caught and surfaced as
|
||||||
|
``{"success": False, "error": ...}``.
|
||||||
"""
|
"""
|
||||||
|
from tools.interrupt import is_interrupted
|
||||||
|
|
||||||
|
if is_interrupted():
|
||||||
|
return {"success": False, "error": "Interrupted"}
|
||||||
|
|
||||||
|
logger.info("Firecrawl search: '%s' (limit=%d)", query, limit)
|
||||||
|
# _get_firecrawl_client() raises ValueError on unconfigured systems —
|
||||||
|
# let it propagate so the dispatcher emits the legacy envelope shape.
|
||||||
|
client = _get_firecrawl_client()
|
||||||
try:
|
try:
|
||||||
from tools.interrupt import is_interrupted
|
response = client.search(query=query, limit=limit)
|
||||||
|
|
||||||
if is_interrupted():
|
|
||||||
return {"success": False, "error": "Interrupted"}
|
|
||||||
|
|
||||||
logger.info("Firecrawl search: '%s' (limit=%d)", query, limit)
|
|
||||||
response = _get_firecrawl_client().search(query=query, limit=limit)
|
|
||||||
web_results = _extract_web_search_results(response)
|
web_results = _extract_web_search_results(response)
|
||||||
logger.info("Firecrawl: found %d search results", len(web_results))
|
logger.info("Firecrawl: found %d search results", len(web_results))
|
||||||
return {"success": True, "data": {"web": web_results}}
|
return {"success": True, "data": {"web": web_results}}
|
||||||
except ValueError as exc:
|
|
||||||
return {"success": False, "error": str(exc)}
|
|
||||||
except ImportError as exc:
|
|
||||||
return {"success": False, "error": f"Firecrawl SDK not installed: {exc}"}
|
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
logger.warning("Firecrawl search error: %s", exc)
|
logger.warning("Firecrawl search error: %s", exc)
|
||||||
return {"success": False, "error": f"Firecrawl search failed: {exc}"}
|
return {"success": False, "error": f"Firecrawl search failed: {exc}"}
|
||||||
|
|
|
||||||
|
|
@ -263,3 +263,72 @@ class TestWebSearchUsesSearchBackend:
|
||||||
|
|
||||||
assert len(called_with) > 0
|
assert len(called_with) > 0
|
||||||
assert called_with[0][0] == "search"
|
assert called_with[0][0] == "search"
|
||||||
|
|
||||||
|
|
||||||
|
class TestUnconfiguredErrorEnvelopeParity:
|
||||||
|
"""Regression tests for PR #25182: the post-migration dispatcher must
|
||||||
|
emit the same top-level error envelope as pre-migration main when no
|
||||||
|
web backend is configured.
|
||||||
|
|
||||||
|
Plugin-level error wrapping is correct for in-flight errors (per-page
|
||||||
|
SDK exceptions, scrape timeouts) but PRE-FLIGHT configuration errors
|
||||||
|
must surface at the top level so function-calling models that check
|
||||||
|
``result.get("error")`` detect the failure cleanly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _clear_web_creds(self, monkeypatch):
|
||||||
|
for k in (
|
||||||
|
"BRAVE_SEARCH_API_KEY",
|
||||||
|
"SEARXNG_URL",
|
||||||
|
"TAVILY_API_KEY",
|
||||||
|
"EXA_API_KEY",
|
||||||
|
"PARALLEL_API_KEY",
|
||||||
|
"FIRECRAWL_API_KEY",
|
||||||
|
"FIRECRAWL_API_URL",
|
||||||
|
"FIRECRAWL_GATEWAY_URL",
|
||||||
|
"TOOL_GATEWAY_DOMAIN",
|
||||||
|
):
|
||||||
|
monkeypatch.delenv(k, raising=False)
|
||||||
|
|
||||||
|
def test_unconfigured_search_emits_top_level_error(self, monkeypatch):
|
||||||
|
"""``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
|
||||||
|
— matching main's ``tool_error()`` envelope, not a per-result shape.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
from tools import web_tools
|
||||||
|
|
||||||
|
self._clear_web_creds(monkeypatch)
|
||||||
|
# Reset firecrawl client cache so the unconfigured state is re-evaluated
|
||||||
|
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
|
||||||
|
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||||
|
|
||||||
|
result = json.loads(web_tools.web_search_tool("hello world", limit=3))
|
||||||
|
assert "error" in result, f"expected top-level 'error' key, got {result}"
|
||||||
|
# ``Error searching web:`` prefix comes from web_tools' top-level except handler
|
||||||
|
assert "Error searching web:" in result["error"]
|
||||||
|
assert "FIRECRAWL_API_KEY" in result["error"]
|
||||||
|
# No per-result burying
|
||||||
|
assert "results" not in result
|
||||||
|
|
||||||
|
def test_unconfigured_crawl_emits_top_level_error(self, monkeypatch):
|
||||||
|
"""``web_crawl_tool`` with no creds returns ``{"success": False, "error": "web_crawl requires Firecrawl..."}``
|
||||||
|
— the dispatcher gates on ``provider.is_available()`` BEFORE
|
||||||
|
delegating to the plugin so pre-config errors don't get wrapped
|
||||||
|
into ``results[]``.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from tools import web_tools
|
||||||
|
|
||||||
|
self._clear_web_creds(monkeypatch)
|
||||||
|
monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
|
||||||
|
monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||||
|
|
||||||
|
result = json.loads(asyncio.run(web_tools.web_crawl_tool("https://example.com", use_llm_processing=False)))
|
||||||
|
assert result.get("success") is False
|
||||||
|
assert "error" in result, f"expected top-level 'error' key, got {result}"
|
||||||
|
assert "web_crawl requires Firecrawl" in result["error"]
|
||||||
|
# Crucially: no per-page burying
|
||||||
|
assert "results" not in result
|
||||||
|
|
|
||||||
|
|
@ -1192,6 +1192,25 @@ async def web_crawl_tool(
|
||||||
if crawl_provider is None:
|
if crawl_provider is None:
|
||||||
crawl_provider = get_active_crawl_provider()
|
crawl_provider = get_active_crawl_provider()
|
||||||
|
|
||||||
|
# Mirror main's upstream availability gate: when the resolved
|
||||||
|
# provider is configured-but-unavailable (e.g. firecrawl without
|
||||||
|
# FIRECRAWL_API_KEY), short-circuit BEFORE we dispatch so the
|
||||||
|
# error envelope matches the legacy top-level shape
|
||||||
|
# ``{"success": False, "error": "..."}`` rather than burying the
|
||||||
|
# configuration message inside a per-page ``results[]`` entry.
|
||||||
|
if crawl_provider is not None and not crawl_provider.is_available():
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"success": False,
|
||||||
|
"error": (
|
||||||
|
"web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, "
|
||||||
|
f"FIRECRAWL_API_URL{_firecrawl_backend_help_suffix()}, "
|
||||||
|
"or use web_search + web_extract instead."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
|
||||||
if crawl_provider is not None:
|
if crawl_provider is not None:
|
||||||
# Ensure URL has protocol
|
# Ensure URL has protocol
|
||||||
if not url.startswith(('http://', 'https://')):
|
if not url.startswith(('http://', 'https://')):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue