Consolidate with lessons from #4372

Pulls in the useful surface area from @vitobotta's parallel PR #4372
and differentiates on the search_lang default and test coverage.

Credit: the tools_config.py integration, extra_snippets idea, and
BRAVE_API_URL override are all adapted from #4372 (@vitobotta).

Changes:
- hermes_cli/tools_config.py: add Brave Search entry so it shows up in
  the `hermes tools` setup wizard alongside the other backends.
- _brave_search: honour BRAVE_API_URL env var (useful for proxies or
  self-hosted gateways); read at call time, not import time.
- _normalize_brave_search_results: merge up to two extra_snippets into
  the description so callers get the richer context Brave provides
  without changing the standard output shape.
- Intentionally do NOT set `search_lang`. Brave auto-detects the query
  language, which is what we want for non-English users — #4372
  hardcoded `search_lang: "en"` and would downgrade French/other-locale
  searches. Added a regression test.
- Docstring explains the search_lang choice and BRAVE_API_URL escape
  hatch.

Tests added (5 more, total 20):
- test_does_not_set_search_lang — regression guard
- test_brave_api_url_override — env-based base URL redirection
- test_extra_snippets_merged_into_description
- test_extra_snippets_used_when_description_empty
- test_no_extra_snippets

All 20 Brave tests pass; 60 existing Tavily/config tests still pass.
This commit is contained in:
Tipiweb 2026-04-19 18:11:11 +02:00
parent 0e3908f4af
commit 4ba5129fdc
3 changed files with 104 additions and 6 deletions

View file

@ -87,6 +87,36 @@ class TestBraveSearchRequest:
with pytest.raises(_httpx.HTTPStatusError):
_brave_search("q")
def test_does_not_set_search_lang(self):
"""Hermes must NOT pin ``search_lang`` — Brave's auto-detection gives
better results for non-English queries. Regression guard for a bug
where an earlier approach hardcoded ``search_lang: \"en\"``."""
mock_response = MagicMock()
mock_response.json.return_value = {"web": {"results": []}}
mock_response.raise_for_status = MagicMock()
with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
from tools.web_tools import _brave_search
_brave_search("recette de pain au miel", limit=3)
params = mock_get.call_args.kwargs.get("params") or {}
assert "search_lang" not in params
def test_brave_api_url_override(self):
"""``BRAVE_API_URL`` env var redirects the request to a custom host
(useful for proxies / self-hosted gateways). Trailing slashes are
stripped so both ``https://proxy/`` and ``https://proxy`` work."""
mock_response = MagicMock()
mock_response.json.return_value = {"web": {"results": []}}
mock_response.raise_for_status = MagicMock()
with patch.dict(os.environ, {"BRAVE_API_KEY": "k", "BRAVE_API_URL": "https://brave.proxy.internal/v1/"}):
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
from tools.web_tools import _brave_search
_brave_search("q")
called_url = mock_get.call_args.args[0]
assert called_url == "https://brave.proxy.internal/v1/web/search"
# ─── _normalize_brave_search_results ─────────────────────────────────────────
@ -142,6 +172,43 @@ class TestNormalizeBraveSearchResults:
assert web[0]["description"] == ""
assert web[0]["position"] == 1
def test_extra_snippets_merged_into_description(self):
"""Brave's ``extra_snippets`` hold additional context from the page.
We merge the first two into the description so the caller sees
richer information without having to know about the Brave-specific
field."""
from tools.web_tools import _normalize_brave_search_results
raw = {"web": {"results": [{
"title": "T", "url": "https://x", "description": "Main description.",
"extra_snippets": ["First extra.", "Second extra.", "Third dropped."],
}]}}
result = _normalize_brave_search_results(raw)
desc = result["data"]["web"][0]["description"]
assert "Main description." in desc
assert "First extra." in desc
assert "Second extra." in desc
# Only first two are merged
assert "Third dropped." not in desc
def test_extra_snippets_used_when_description_empty(self):
"""When Brave returns no main description, fall back to snippets only."""
from tools.web_tools import _normalize_brave_search_results
raw = {"web": {"results": [{
"title": "T", "url": "https://x", "description": "",
"extra_snippets": ["Only snippet."],
}]}}
result = _normalize_brave_search_results(raw)
assert result["data"]["web"][0]["description"] == "Only snippet."
def test_no_extra_snippets(self):
"""Absent ``extra_snippets`` → description unchanged (no trailing space)."""
from tools.web_tools import _normalize_brave_search_results
raw = {"web": {"results": [{
"title": "T", "url": "https://x", "description": "Just main.",
}]}}
result = _normalize_brave_search_results(raw)
assert result["data"]["web"][0]["description"] == "Just main."
# ─── Backend detection ───────────────────────────────────────────────────────