Address review: Firecrawl fallback for extract + unit tests

- web_extract_tool: when backend=brave and FIRECRAWL_API_KEY is
  configured, route extract through Firecrawl automatically instead of
  returning an error. Falls back to a clear tool_error only when no
  Firecrawl credentials are available.
- _brave_search docstring: describe the actual fallback behaviour
  (previous wording said extract fell back to Firecrawl but the code
  was returning an error unconditionally).
- tests/tools/test_web_tools_brave.py: 15 tests covering request
  construction (header auth, param clamping, HTTP error propagation),
  response normalization (empty web, null web, missing fields),
  backend detection, search dispatch, and both extract fallback paths.
This commit is contained in:
Tipiweb 2026-04-19 13:07:58 +02:00
parent 5aee5e0970
commit 0e3908f4af
2 changed files with 250 additions and 11 deletions

View file

@ -0,0 +1,231 @@
"""Tests for Brave Search web backend integration.
Coverage:
_brave_search() API key handling, header auth, query params, error propagation.
_normalize_brave_search_results() Brave standard search response mapping.
_is_backend_available() / check_web_api_key() backend detection.
web_search_tool Brave dispatch.
web_extract_tool Firecrawl fallback when backend=brave, clear error otherwise.
"""
import json
import os
import asyncio
import pytest
from unittest.mock import patch, MagicMock
# ─── _brave_search ───────────────────────────────────────────────────────────
class TestBraveSearchRequest:
"""Test suite for the _brave_search helper."""
def test_raises_without_api_key(self):
"""No BRAVE_API_KEY → ValueError with guidance."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("BRAVE_API_KEY", None)
from tools.web_tools import _brave_search
with pytest.raises(ValueError, match="BRAVE_API_KEY"):
_brave_search("test")
def test_sends_subscription_token_header(self):
"""API key is sent via X-Subscription-Token header (not JSON body)."""
mock_response = MagicMock()
mock_response.json.return_value = {"web": {"results": []}}
mock_response.raise_for_status = MagicMock()
with patch.dict(os.environ, {"BRAVE_API_KEY": "brave-test-key"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
from tools.web_tools import _brave_search
_brave_search("hello world", limit=3)
mock_get.assert_called_once()
call = mock_get.call_args
headers = call.kwargs.get("headers") or {}
params = call.kwargs.get("params") or {}
assert headers.get("X-Subscription-Token") == "brave-test-key"
assert headers.get("Accept") == "application/json"
assert params["q"] == "hello world"
assert params["count"] == 3
assert "api.search.brave.com/res/v1/web/search" in call.args[0]
def test_clamps_limit_to_brave_max(self):
"""Brave caps count at 20 — our code must clamp before sending."""
mock_response = MagicMock()
mock_response.json.return_value = {"web": {"results": []}}
mock_response.raise_for_status = MagicMock()
with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
from tools.web_tools import _brave_search
_brave_search("q", limit=999)
assert mock_get.call_args.kwargs["params"]["count"] == 20
def test_clamps_limit_to_at_least_one(self):
"""Zero/negative limit should clamp to 1 (Brave rejects count=0)."""
mock_response = MagicMock()
mock_response.json.return_value = {"web": {"results": []}}
mock_response.raise_for_status = MagicMock()
with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
from tools.web_tools import _brave_search
_brave_search("q", limit=0)
assert mock_get.call_args.kwargs["params"]["count"] == 1
def test_raises_on_http_error(self):
"""Non-2xx responses propagate as httpx.HTTPStatusError."""
import httpx as _httpx
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = _httpx.HTTPStatusError(
"429 Too Many Requests", request=MagicMock(), response=mock_response
)
with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response):
from tools.web_tools import _brave_search
with pytest.raises(_httpx.HTTPStatusError):
_brave_search("q")
# ─── _normalize_brave_search_results ─────────────────────────────────────────
class TestNormalizeBraveSearchResults:
"""Test Brave response → standard web search format."""
def test_basic_normalization(self):
from tools.web_tools import _normalize_brave_search_results
raw = {
"web": {
"results": [
{"title": "Python Docs", "url": "https://docs.python.org", "description": "Official docs"},
{"title": "Tutorial", "url": "https://example.com", "description": "A tutorial"},
]
}
}
result = _normalize_brave_search_results(raw)
assert result["success"] is True
web = result["data"]["web"]
assert len(web) == 2
assert web[0]["title"] == "Python Docs"
assert web[0]["url"] == "https://docs.python.org"
assert web[0]["description"] == "Official docs"
assert web[0]["position"] == 1
assert web[1]["position"] == 2
def test_empty_results(self):
from tools.web_tools import _normalize_brave_search_results
result = _normalize_brave_search_results({"web": {"results": []}})
assert result["success"] is True
assert result["data"]["web"] == []
def test_missing_web_key(self):
"""Brave may omit the ``web`` key when no web results are returned."""
from tools.web_tools import _normalize_brave_search_results
result = _normalize_brave_search_results({})
assert result["success"] is True
assert result["data"]["web"] == []
def test_web_is_null(self):
"""Defensive: Brave returns ``web: null`` in some edge cases."""
from tools.web_tools import _normalize_brave_search_results
result = _normalize_brave_search_results({"web": None})
assert result["success"] is True
assert result["data"]["web"] == []
def test_missing_fields(self):
from tools.web_tools import _normalize_brave_search_results
result = _normalize_brave_search_results({"web": {"results": [{}]}})
web = result["data"]["web"]
assert web[0]["title"] == ""
assert web[0]["url"] == ""
assert web[0]["description"] == ""
assert web[0]["position"] == 1
# ─── Backend detection ───────────────────────────────────────────────────────
class TestBraveBackendDetection:
"""Brave recognised by _is_backend_available / _get_backend / check_web_api_key."""
def test_is_backend_available_brave(self):
from tools.web_tools import _is_backend_available
with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}):
assert _is_backend_available("brave") is True
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("BRAVE_API_KEY", None)
assert _is_backend_available("brave") is False
def test_get_backend_honours_configured_brave(self):
from tools.web_tools import _get_backend
with patch("tools.web_tools._load_web_config", return_value={"backend": "brave"}):
assert _get_backend() == "brave"
# ─── web_search_tool (Brave dispatch) ────────────────────────────────────────
class TestWebSearchBrave:
"""Test web_search_tool dispatch to Brave."""
def test_search_dispatches_to_brave(self):
mock_response = MagicMock()
mock_response.json.return_value = {
"web": {"results": [{"title": "Result", "url": "https://r.com", "description": "desc"}]}
}
mock_response.raise_for_status = MagicMock()
with patch("tools.web_tools._get_backend", return_value="brave"), \
patch.dict(os.environ, {"BRAVE_API_KEY": "k"}), \
patch("tools.web_tools.httpx.get", return_value=mock_response), \
patch("tools.interrupt.is_interrupted", return_value=False):
from tools.web_tools import web_search_tool
result = json.loads(web_search_tool("test query", limit=3))
assert result["success"] is True
assert len(result["data"]["web"]) == 1
assert result["data"]["web"][0]["title"] == "Result"
assert result["data"]["web"][0]["position"] == 1
# ─── web_extract_tool (Brave fallback behaviour) ─────────────────────────────
class TestWebExtractBraveFallback:
"""When backend is Brave, extract must fall back to Firecrawl or error."""
def test_extract_errors_when_brave_and_no_firecrawl(self):
"""No Firecrawl key → clear tool_error, not a cryptic crash."""
with patch("tools.web_tools._get_backend", return_value="brave"), \
patch("tools.web_tools.check_firecrawl_api_key", return_value=False), \
patch("tools.web_tools.is_safe_url", return_value=True), \
patch("tools.web_tools.check_website_access", return_value=None):
from tools.web_tools import web_extract_tool
result = json.loads(asyncio.get_event_loop().run_until_complete(
web_extract_tool(["https://example.com"], use_llm_processing=False)
))
assert result.get("success") is False
assert "Brave backend supports web_search only" in result.get("error", "")
assert "FIRECRAWL_API_KEY" in result.get("error", "")
def test_extract_falls_back_to_firecrawl_when_available(self):
"""Brave + Firecrawl → extract routes through Firecrawl seamlessly."""
fake_firecrawl_client = MagicMock()
fake_scrape_result = MagicMock()
fake_scrape_result.markdown = "Extracted markdown"
fake_scrape_result.html = "<p>Extracted</p>"
fake_scrape_result.metadata = {"title": "Example", "sourceURL": "https://example.com"}
fake_firecrawl_client.scrape.return_value = fake_scrape_result
with patch("tools.web_tools._get_backend", return_value="brave"), \
patch("tools.web_tools.check_firecrawl_api_key", return_value=True), \
patch("tools.web_tools._get_firecrawl_client", return_value=fake_firecrawl_client), \
patch("tools.web_tools.is_safe_url", return_value=True), \
patch("tools.web_tools.check_website_access", return_value=None), \
patch("tools.web_tools.process_content_with_llm", return_value=None):
from tools.web_tools import web_extract_tool
raw = asyncio.get_event_loop().run_until_complete(
web_extract_tool(["https://example.com"], use_llm_processing=False)
)
# Firecrawl path returns the Firecrawl-shape envelope; just
# assert it didn't short-circuit to the Brave error and that
# Firecrawl's scrape was actually invoked.
assert "Brave backend supports web_search only" not in raw
fake_firecrawl_client.scrape.assert_called()