mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Consolidate with lessons from #4372
Pulls in the useful surface area from @vitobotta's parallel PR #4372 and differentiates on the search_lang default and test coverage. Credit: the tools_config.py integration, extra_snippets idea, and BRAVE_API_URL override are all adapted from #4372 (@vitobotta). Changes: - hermes_cli/tools_config.py: add Brave Search entry so it shows up in the `hermes tools` setup wizard alongside the other backends. - _brave_search: honour BRAVE_API_URL env var (useful for proxies or self-hosted gateways); read at call time, not import time. - _normalize_brave_search_results: merge up to two extra_snippets into the description so callers get the richer context Brave provides without changing the standard output shape. - Intentionally do NOT set `search_lang`. Brave auto-detects the query language, which is what we want for non-English users — #4372 hardcoded `search_lang: "en"` and would downgrade French/other-locale searches. Added a regression test. - Docstring explains the search_lang choice and BRAVE_API_URL escape hatch. Tests added (5 more, total 20): - test_does_not_set_search_lang — regression guard - test_brave_api_url_override — env-based base URL redirection - test_extra_snippets_merged_into_description - test_extra_snippets_used_when_description_empty - test_no_extra_snippets All 20 Brave tests pass; 60 existing Tavily/config tests still pass.
This commit is contained in:
parent
0e3908f4af
commit
4ba5129fdc
3 changed files with 104 additions and 6 deletions
|
|
@ -235,6 +235,15 @@ TOOL_CATEGORIES = {
|
|||
{"key": "TAVILY_API_KEY", "prompt": "Tavily API key", "url": "https://app.tavily.com/home"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Brave Search",
|
||||
"badge": "free tier",
|
||||
"tag": "Independent index, privacy-focused — 2000 free queries/mo; search only (extract/crawl need Firecrawl)",
|
||||
"web_backend": "brave",
|
||||
"env_vars": [
|
||||
{"key": "BRAVE_API_KEY", "prompt": "Brave Search API key", "url": "https://api-dashboard.search.brave.com/"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Firecrawl Self-Hosted",
|
||||
"badge": "free · self-hosted",
|
||||
|
|
|
|||
|
|
@ -87,6 +87,36 @@ class TestBraveSearchRequest:
|
|||
with pytest.raises(_httpx.HTTPStatusError):
|
||||
_brave_search("q")
|
||||
|
||||
def test_does_not_set_search_lang(self):
|
||||
"""Hermes must NOT pin ``search_lang`` — Brave's auto-detection gives
|
||||
better results for non-English queries. Regression guard for a bug
|
||||
where an earlier approach hardcoded ``search_lang: \"en\"``."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {"web": {"results": []}}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}):
|
||||
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
|
||||
from tools.web_tools import _brave_search
|
||||
_brave_search("recette de pain au miel", limit=3)
|
||||
params = mock_get.call_args.kwargs.get("params") or {}
|
||||
assert "search_lang" not in params
|
||||
|
||||
def test_brave_api_url_override(self):
|
||||
"""``BRAVE_API_URL`` env var redirects the request to a custom host
|
||||
(useful for proxies / self-hosted gateways). Trailing slashes are
|
||||
stripped so both ``https://proxy/`` and ``https://proxy`` work."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {"web": {"results": []}}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
with patch.dict(os.environ, {"BRAVE_API_KEY": "k", "BRAVE_API_URL": "https://brave.proxy.internal/v1/"}):
|
||||
with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get:
|
||||
from tools.web_tools import _brave_search
|
||||
_brave_search("q")
|
||||
called_url = mock_get.call_args.args[0]
|
||||
assert called_url == "https://brave.proxy.internal/v1/web/search"
|
||||
|
||||
|
||||
# ─── _normalize_brave_search_results ─────────────────────────────────────────
|
||||
|
||||
|
|
@ -142,6 +172,43 @@ class TestNormalizeBraveSearchResults:
|
|||
assert web[0]["description"] == ""
|
||||
assert web[0]["position"] == 1
|
||||
|
||||
def test_extra_snippets_merged_into_description(self):
|
||||
"""Brave's ``extra_snippets`` hold additional context from the page.
|
||||
We merge the first two into the description so the caller sees
|
||||
richer information without having to know about the Brave-specific
|
||||
field."""
|
||||
from tools.web_tools import _normalize_brave_search_results
|
||||
raw = {"web": {"results": [{
|
||||
"title": "T", "url": "https://x", "description": "Main description.",
|
||||
"extra_snippets": ["First extra.", "Second extra.", "Third dropped."],
|
||||
}]}}
|
||||
result = _normalize_brave_search_results(raw)
|
||||
desc = result["data"]["web"][0]["description"]
|
||||
assert "Main description." in desc
|
||||
assert "First extra." in desc
|
||||
assert "Second extra." in desc
|
||||
# Only first two are merged
|
||||
assert "Third dropped." not in desc
|
||||
|
||||
def test_extra_snippets_used_when_description_empty(self):
|
||||
"""When Brave returns no main description, fall back to snippets only."""
|
||||
from tools.web_tools import _normalize_brave_search_results
|
||||
raw = {"web": {"results": [{
|
||||
"title": "T", "url": "https://x", "description": "",
|
||||
"extra_snippets": ["Only snippet."],
|
||||
}]}}
|
||||
result = _normalize_brave_search_results(raw)
|
||||
assert result["data"]["web"][0]["description"] == "Only snippet."
|
||||
|
||||
def test_no_extra_snippets(self):
|
||||
"""Absent ``extra_snippets`` → description unchanged (no trailing space)."""
|
||||
from tools.web_tools import _normalize_brave_search_results
|
||||
raw = {"web": {"results": [{
|
||||
"title": "T", "url": "https://x", "description": "Just main.",
|
||||
}]}}
|
||||
result = _normalize_brave_search_results(raw)
|
||||
assert result["data"]["web"][0]["description"] == "Just main."
|
||||
|
||||
|
||||
# ─── Backend detection ───────────────────────────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -368,7 +368,16 @@ def _normalize_tavily_documents(response: dict, fallback_url: str = "") -> List[
|
|||
|
||||
# ─── Brave Search Client ─────────────────────────────────────────────────────
|
||||
|
||||
_BRAVE_BASE_URL = "https://api.search.brave.com/res/v1"
|
||||
_BRAVE_DEFAULT_BASE_URL = "https://api.search.brave.com/res/v1"
|
||||
|
||||
|
||||
def _get_brave_base_url() -> str:
|
||||
"""Return the Brave API base URL, honouring the ``BRAVE_API_URL`` override.
|
||||
|
||||
The override is read at call time (not at import time) so tests and
|
||||
runtime config changes take effect without reloading the module.
|
||||
"""
|
||||
return (os.getenv("BRAVE_API_URL") or _BRAVE_DEFAULT_BASE_URL).rstrip("/")
|
||||
|
||||
|
||||
def _brave_search(query: str, limit: int = 5) -> dict:
|
||||
|
|
@ -378,7 +387,12 @@ def _brave_search(query: str, limit: int = 5) -> dict:
|
|||
automatically when ``FIRECRAWL_API_KEY`` is configured, and returns a
|
||||
clear ``tool_error`` otherwise. ``web_crawl_tool`` is gated by the
|
||||
existing ``check_firecrawl_api_key()`` guard in that function.
|
||||
Auth is via the ``X-Subscription-Token`` header.
|
||||
|
||||
Auth is via the ``X-Subscription-Token`` header. ``search_lang`` is
|
||||
intentionally *not* set — Brave auto-detects the query language, which
|
||||
gives better results for non-English users than pinning to a single
|
||||
locale. Callers that need a specific locale can send ``BRAVE_API_URL``
|
||||
to a proxy that injects the parameter.
|
||||
"""
|
||||
api_key = os.getenv("BRAVE_API_KEY")
|
||||
if not api_key:
|
||||
|
|
@ -386,10 +400,11 @@ def _brave_search(query: str, limit: int = 5) -> dict:
|
|||
"BRAVE_API_KEY environment variable not set. "
|
||||
"Get your API key at https://api-dashboard.search.brave.com/"
|
||||
)
|
||||
url = f"{_BRAVE_BASE_URL}/web/search"
|
||||
url = f"{_get_brave_base_url()}/web/search"
|
||||
headers = {
|
||||
"X-Subscription-Token": api_key,
|
||||
"Accept": "application/json",
|
||||
"Accept-Encoding": "gzip",
|
||||
}
|
||||
params = {
|
||||
"q": query,
|
||||
|
|
@ -404,16 +419,23 @@ def _brave_search(query: str, limit: int = 5) -> dict:
|
|||
def _normalize_brave_search_results(response: dict) -> dict:
|
||||
"""Normalize Brave /web/search response to the standard web search format.
|
||||
|
||||
Brave returns ``{web: {results: [{title, url, description, ...}]}}``.
|
||||
We map to ``{success, data: {web: [{title, url, description, position}]}}``.
|
||||
Brave returns ``{web: {results: [{title, url, description, extra_snippets, ...}]}}``.
|
||||
We map to ``{success, data: {web: [{title, url, description, position}]}}``
|
||||
and merge up to two ``extra_snippets`` into the description so the caller
|
||||
gets the richer context Brave provides without changing the output shape.
|
||||
"""
|
||||
raw_results = (response.get("web") or {}).get("results") or []
|
||||
web_results = []
|
||||
for i, result in enumerate(raw_results):
|
||||
description = result.get("description", "") or ""
|
||||
extra = result.get("extra_snippets") or []
|
||||
if extra:
|
||||
joined_extra = " ".join(s for s in extra[:2] if s)
|
||||
description = f"{description} {joined_extra}".strip() if description else joined_extra
|
||||
web_results.append({
|
||||
"title": result.get("title", ""),
|
||||
"url": result.get("url", ""),
|
||||
"description": result.get("description", ""),
|
||||
"description": description,
|
||||
"position": i + 1,
|
||||
})
|
||||
return {"success": True, "data": {"web": web_results}}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue