diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 8e4bde883..21da364f6 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -235,6 +235,15 @@ TOOL_CATEGORIES = { {"key": "TAVILY_API_KEY", "prompt": "Tavily API key", "url": "https://app.tavily.com/home"}, ], }, + { + "name": "Brave Search", + "badge": "free tier", + "tag": "Independent index, privacy-focused — 2000 free queries/mo; search only (extract/crawl need Firecrawl)", + "web_backend": "brave", + "env_vars": [ + {"key": "BRAVE_API_KEY", "prompt": "Brave Search API key", "url": "https://api-dashboard.search.brave.com/"}, + ], + }, { "name": "Firecrawl Self-Hosted", "badge": "free · self-hosted", diff --git a/tests/tools/test_web_tools_brave.py b/tests/tools/test_web_tools_brave.py index f40dbaa60..1db100d68 100644 --- a/tests/tools/test_web_tools_brave.py +++ b/tests/tools/test_web_tools_brave.py @@ -87,6 +87,36 @@ class TestBraveSearchRequest: with pytest.raises(_httpx.HTTPStatusError): _brave_search("q") + def test_does_not_set_search_lang(self): + """Hermes must NOT pin ``search_lang`` — Brave's auto-detection gives + better results for non-English queries. Regression guard for a bug + where an earlier approach hardcoded ``search_lang: \"en\"``.""" + mock_response = MagicMock() + mock_response.json.return_value = {"web": {"results": []}} + mock_response.raise_for_status = MagicMock() + + with patch.dict(os.environ, {"BRAVE_API_KEY": "k"}): + with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get: + from tools.web_tools import _brave_search + _brave_search("recette de pain au miel", limit=3) + params = mock_get.call_args.kwargs.get("params") or {} + assert "search_lang" not in params + + def test_brave_api_url_override(self): + """``BRAVE_API_URL`` env var redirects the request to a custom host + (useful for proxies / self-hosted gateways). Trailing slashes are + stripped so both ``https://proxy/`` and ``https://proxy`` work.""" + mock_response = MagicMock() + mock_response.json.return_value = {"web": {"results": []}} + mock_response.raise_for_status = MagicMock() + + with patch.dict(os.environ, {"BRAVE_API_KEY": "k", "BRAVE_API_URL": "https://brave.proxy.internal/v1/"}): + with patch("tools.web_tools.httpx.get", return_value=mock_response) as mock_get: + from tools.web_tools import _brave_search + _brave_search("q") + called_url = mock_get.call_args.args[0] + assert called_url == "https://brave.proxy.internal/v1/web/search" + # ─── _normalize_brave_search_results ───────────────────────────────────────── @@ -142,6 +172,43 @@ class TestNormalizeBraveSearchResults: assert web[0]["description"] == "" assert web[0]["position"] == 1 + def test_extra_snippets_merged_into_description(self): + """Brave's ``extra_snippets`` hold additional context from the page. + We merge the first two into the description so the caller sees + richer information without having to know about the Brave-specific + field.""" + from tools.web_tools import _normalize_brave_search_results + raw = {"web": {"results": [{ + "title": "T", "url": "https://x", "description": "Main description.", + "extra_snippets": ["First extra.", "Second extra.", "Third dropped."], + }]}} + result = _normalize_brave_search_results(raw) + desc = result["data"]["web"][0]["description"] + assert "Main description." in desc + assert "First extra." in desc + assert "Second extra." in desc + # Only first two are merged + assert "Third dropped." not in desc + + def test_extra_snippets_used_when_description_empty(self): + """When Brave returns no main description, fall back to snippets only.""" + from tools.web_tools import _normalize_brave_search_results + raw = {"web": {"results": [{ + "title": "T", "url": "https://x", "description": "", + "extra_snippets": ["Only snippet."], + }]}} + result = _normalize_brave_search_results(raw) + assert result["data"]["web"][0]["description"] == "Only snippet." + + def test_no_extra_snippets(self): + """Absent ``extra_snippets`` → description unchanged (no trailing space).""" + from tools.web_tools import _normalize_brave_search_results + raw = {"web": {"results": [{ + "title": "T", "url": "https://x", "description": "Just main.", + }]}} + result = _normalize_brave_search_results(raw) + assert result["data"]["web"][0]["description"] == "Just main." + # ─── Backend detection ─────────────────────────────────────────────────────── diff --git a/tools/web_tools.py b/tools/web_tools.py index cd7e355ff..f9d588e58 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -368,7 +368,16 @@ def _normalize_tavily_documents(response: dict, fallback_url: str = "") -> List[ # ─── Brave Search Client ───────────────────────────────────────────────────── -_BRAVE_BASE_URL = "https://api.search.brave.com/res/v1" +_BRAVE_DEFAULT_BASE_URL = "https://api.search.brave.com/res/v1" + + +def _get_brave_base_url() -> str: + """Return the Brave API base URL, honouring the ``BRAVE_API_URL`` override. + + The override is read at call time (not at import time) so tests and + runtime config changes take effect without reloading the module. + """ + return (os.getenv("BRAVE_API_URL") or _BRAVE_DEFAULT_BASE_URL).rstrip("/") def _brave_search(query: str, limit: int = 5) -> dict: @@ -378,7 +387,12 @@ def _brave_search(query: str, limit: int = 5) -> dict: automatically when ``FIRECRAWL_API_KEY`` is configured, and returns a clear ``tool_error`` otherwise. ``web_crawl_tool`` is gated by the existing ``check_firecrawl_api_key()`` guard in that function. - Auth is via the ``X-Subscription-Token`` header. + + Auth is via the ``X-Subscription-Token`` header. ``search_lang`` is + intentionally *not* set — Brave auto-detects the query language, which + gives better results for non-English users than pinning to a single + locale. Callers that need a specific locale can send ``BRAVE_API_URL`` + to a proxy that injects the parameter. """ api_key = os.getenv("BRAVE_API_KEY") if not api_key: @@ -386,10 +400,11 @@ def _brave_search(query: str, limit: int = 5) -> dict: "BRAVE_API_KEY environment variable not set. " "Get your API key at https://api-dashboard.search.brave.com/" ) - url = f"{_BRAVE_BASE_URL}/web/search" + url = f"{_get_brave_base_url()}/web/search" headers = { "X-Subscription-Token": api_key, "Accept": "application/json", + "Accept-Encoding": "gzip", } params = { "q": query, @@ -404,16 +419,23 @@ def _brave_search(query: str, limit: int = 5) -> dict: def _normalize_brave_search_results(response: dict) -> dict: """Normalize Brave /web/search response to the standard web search format. - Brave returns ``{web: {results: [{title, url, description, ...}]}}``. - We map to ``{success, data: {web: [{title, url, description, position}]}}``. + Brave returns ``{web: {results: [{title, url, description, extra_snippets, ...}]}}``. + We map to ``{success, data: {web: [{title, url, description, position}]}}`` + and merge up to two ``extra_snippets`` into the description so the caller + gets the richer context Brave provides without changing the output shape. """ raw_results = (response.get("web") or {}).get("results") or [] web_results = [] for i, result in enumerate(raw_results): + description = result.get("description", "") or "" + extra = result.get("extra_snippets") or [] + if extra: + joined_extra = " ".join(s for s in extra[:2] if s) + description = f"{description} {joined_extra}".strip() if description else joined_extra web_results.append({ "title": result.get("title", ""), "url": result.get("url", ""), - "description": result.get("description", ""), + "description": description, "position": i + 1, }) return {"success": True, "data": {"web": web_results}}