Consolidate with lessons from #4372

Pulls in the useful surface area from @vitobotta's parallel PR #4372
and differentiates on the search_lang default and test coverage.

Credit: the tools_config.py integration, extra_snippets idea, and
BRAVE_API_URL override are all adapted from #4372 (@vitobotta).

Changes:
- hermes_cli/tools_config.py: add Brave Search entry so it shows up in
  the `hermes tools` setup wizard alongside the other backends.
- _brave_search: honour BRAVE_API_URL env var (useful for proxies or
  self-hosted gateways); read at call time, not import time.
- _normalize_brave_search_results: merge up to two extra_snippets into
  the description so callers get the richer context Brave provides
  without changing the standard output shape.
- Intentionally do NOT set `search_lang`. Brave auto-detects the query
  language, which is what we want for non-English users — #4372
  hardcoded `search_lang: "en"` and would downgrade French/other-locale
  searches. Added a regression test.
- Docstring explains the search_lang choice and BRAVE_API_URL escape
  hatch.

Tests added (5 more, total 20):
- test_does_not_set_search_lang — regression guard
- test_brave_api_url_override — env-based base URL redirection
- test_extra_snippets_merged_into_description
- test_extra_snippets_used_when_description_empty
- test_no_extra_snippets

All 20 Brave tests pass; 60 existing Tavily/config tests still pass.
This commit is contained in:
Tipiweb 2026-04-19 18:11:11 +02:00
parent 0e3908f4af
commit 4ba5129fdc
3 changed files with 104 additions and 6 deletions

View file

@ -235,6 +235,15 @@ TOOL_CATEGORIES = {
{"key": "TAVILY_API_KEY", "prompt": "Tavily API key", "url": "https://app.tavily.com/home"},
],
},
{
"name": "Brave Search",
"badge": "free tier",
"tag": "Independent index, privacy-focused — 2000 free queries/mo; search only (extract/crawl need Firecrawl)",
"web_backend": "brave",
"env_vars": [
{"key": "BRAVE_API_KEY", "prompt": "Brave Search API key", "url": "https://api-dashboard.search.brave.com/"},
],
},
{
"name": "Firecrawl Self-Hosted",
"badge": "free · self-hosted",

View file

@ -87,6 +87,36 @@ class TestBraveSearchRequest:
with pytest.raises(_httpx.HTTPStatusError):
_brave_search("q")
def test_does_not_set_search_lang(self):
"""Hermes must NOT pin ``search_lang`` — Brave's auto-detection gives
better results for non-English queries. Regression guard for a bug
where an earlier approach hardcoded ``search_lang: \"en\"``."""
mock_response = MagicMock()
mock_response.json.return_value = {"web": {"results": []}}
mock_response.raise_for_status = MagicMock()
with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
from tools.web_tools import _brave_search
_brave_search("recette de pain au miel", limit=3)
params = mock_get.call_args.kwargs.get("params") or {}
assert "search_lang" not in params
def test_brave_api_url_override(self):
"""``BRAVE_API_URL`` env var redirects the request to a custom host
(useful for proxies / self-hosted gateways). Trailing slashes are
stripped so both ``https://proxy/`` and ``https://proxy`` work."""
mock_response = MagicMock()
mock_response.json.return_value = {"web": {"results": []}}
mock_response.raise_for_status = MagicMock()
with patch.dict(os.environ, {"BRAVE_API_KEY": "k", "BRAVE_API_URL": "https://brave.proxy.internal/v1/"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
from tools.web_tools import _brave_search
_brave_search("q")
called_url = mock_get.call_args.args[0]
assert called_url == "https://brave.proxy.internal/v1/web/search"
# ─── _normalize_brave_search_results ─────────────────────────────────────────
@ -142,6 +172,43 @@ class TestNormalizeBraveSearchResults:
assert web[0]["description"] == ""
assert web[0]["position"] == 1
def test_extra_snippets_merged_into_description(self):
"""Brave's ``extra_snippets`` hold additional context from the page.
We merge the first two into the description so the caller sees
richer information without having to know about the Brave-specific
field."""
from tools.web_tools import _normalize_brave_search_results
raw = {"web": {"results": [{
"title": "T", "url": "https://x", "description": "Main description.",
"extra_snippets": ["First extra.", "Second extra.", "Third dropped."],
}]}}
result = _normalize_brave_search_results(raw)
desc = result["data"]["web"][0]["description"]
assert "Main description." in desc
assert "First extra." in desc
assert "Second extra." in desc
# Only first two are merged
assert "Third dropped." not in desc
def test_extra_snippets_used_when_description_empty(self):
"""When Brave returns no main description, fall back to snippets only."""
from tools.web_tools import _normalize_brave_search_results
raw = {"web": {"results": [{
"title": "T", "url": "https://x", "description": "",
"extra_snippets": ["Only snippet."],
}]}}
result = _normalize_brave_search_results(raw)
assert result["data"]["web"][0]["description"] == "Only snippet."
def test_no_extra_snippets(self):
"""Absent ``extra_snippets`` → description unchanged (no trailing space)."""
from tools.web_tools import _normalize_brave_search_results
raw = {"web": {"results": [{
"title": "T", "url": "https://x", "description": "Just main.",
}]}}
result = _normalize_brave_search_results(raw)
assert result["data"]["web"][0]["description"] == "Just main."
# ─── Backend detection ───────────────────────────────────────────────────────

View file

@ -368,7 +368,16 @@ def _normalize_tavily_documents(response: dict, fallback_url: str = "") -> List[
# ─── Brave Search Client ─────────────────────────────────────────────────────
_BRAVE_BASE_URL = "https://api.search.brave.com/res/v1"
_BRAVE_DEFAULT_BASE_URL = "https://api.search.brave.com/res/v1"
def _get_brave_base_url() -> str:
"""Return the Brave API base URL, honouring the ``BRAVE_API_URL`` override.
The override is read at call time (not at import time) so tests and
runtime config changes take effect without reloading the module.
"""
return (os.getenv("BRAVE_API_URL") or _BRAVE_DEFAULT_BASE_URL).rstrip("/")
def _brave_search(query: str, limit: int = 5) -> dict:
@ -378,7 +387,12 @@ def _brave_search(query: str, limit: int = 5) -> dict:
automatically when ``FIRECRAWL_API_KEY`` is configured, and returns a
clear ``tool_error`` otherwise. ``web_crawl_tool`` is gated by the
existing ``check_firecrawl_api_key()`` guard in that function.
Auth is via the ``X-Subscription-Token`` header.
Auth is via the ``X-Subscription-Token`` header. ``search_lang`` is
intentionally *not* set Brave auto-detects the query language, which
gives better results for non-English users than pinning to a single
locale. Callers that need a specific locale can send ``BRAVE_API_URL``
to a proxy that injects the parameter.
"""
api_key = os.getenv("BRAVE_API_KEY")
if not api_key:
@ -386,10 +400,11 @@ def _brave_search(query: str, limit: int = 5) -> dict:
"BRAVE_API_KEY environment variable not set. "
"Get your API key at https://api-dashboard.search.brave.com/"
)
url = f"{_BRAVE_BASE_URL}/web/search"
url = f"{_get_brave_base_url()}/web/search"
headers = {
"X-Subscription-Token": api_key,
"Accept": "application/json",
"Accept-Encoding": "gzip",
}
params = {
"q": query,
@ -404,16 +419,23 @@ def _brave_search(query: str, limit: int = 5) -> dict:
def _normalize_brave_search_results(response: dict) -> dict:
"""Normalize Brave /web/search response to the standard web search format.
Brave returns ``{web: {results: [{title, url, description, ...}]}}``.
We map to ``{success, data: {web: [{title, url, description, position}]}}``.
Brave returns ``{web: {results: [{title, url, description, extra_snippets, ...}]}}``.
We map to ``{success, data: {web: [{title, url, description, position}]}}``
and merge up to two ``extra_snippets`` into the description so the caller
gets the richer context Brave provides without changing the output shape.
"""
raw_results = (response.get("web") or {}).get("results") or []
web_results = []
for i, result in enumerate(raw_results):
description = result.get("description", "") or ""
extra = result.get("extra_snippets") or []
if extra:
joined_extra = " ".join(s for s in extra[:2] if s)
description = f"{description} {joined_extra}".strip() if description else joined_extra
web_results.append({
"title": result.get("title", ""),
"url": result.get("url", ""),
"description": result.get("description", ""),
"description": description,
"position": i + 1,
})
return {"success": True, "data": {"web": web_results}}