diff --git a/hermes_cli/config.py b/hermes_cli/config.py index cdb53fd080..65d85cd58b 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1864,6 +1864,14 @@ OPTIONAL_ENV_VARS = { "password": False, "category": "tool", }, + "BRAVE_SEARCH_API_KEY": { + "description": "Brave Search API subscription token (free tier: 2,000 queries/mo)", + "prompt": "Brave Search subscription token", + "url": "https://brave.com/search/api/", + "tools": ["web_search"], + "password": True, + "category": "tool", + }, "BROWSERBASE_API_KEY": { "description": "Browserbase API key for cloud browser (optional — local browser works without this)", "prompt": "Browserbase API key", diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index b258e15998..aa07e85e7a 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -308,6 +308,23 @@ TOOL_CATEGORIES = { {"key": "SEARXNG_URL", "prompt": "Your SearXNG instance URL (e.g., http://localhost:8080)", "url": "https://searxng.github.io/searxng/"}, ], }, + { + "name": "Brave Search (Free Tier)", + "badge": "free tier · search only", + "tag": "2,000 queries/mo free — search only (pair with any extract provider)", + "web_backend": "brave-free", + "env_vars": [ + {"key": "BRAVE_SEARCH_API_KEY", "prompt": "Brave Search subscription token", "url": "https://brave.com/search/api/"}, + ], + }, + { + "name": "DuckDuckGo (ddgs)", + "badge": "free · no key · search only", + "tag": "Search via the ddgs Python package — no API key (pair with any extract provider)", + "web_backend": "ddgs", + "env_vars": [], + "post_setup": "ddgs", + }, ], }, "image_gen": { @@ -669,6 +686,32 @@ def _run_post_setup(post_setup_key: str): _print_info(" Full voice list: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md") _print_info(" Switch voices by setting tts.piper.voice in ~/.hermes/config.yaml") + elif post_setup_key == "ddgs": + try: + __import__("ddgs") + _print_success(" ddgs is already installed") + except ImportError: + import subprocess + _print_info(" Installing ddgs (DuckDuckGo search package)...") + try: + result = subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", "ddgs", "--quiet"], + capture_output=True, text=True, timeout=300, + ) + if result.returncode == 0: + _print_success(" ddgs installed") + else: + _print_warning(" ddgs install failed:") + _print_info(f" {result.stderr.strip()[:300]}") + _print_info(" Run manually: python -m pip install -U ddgs") + return + except subprocess.TimeoutExpired: + _print_warning(" ddgs install timed out (>5min)") + _print_info(" Run manually: python -m pip install -U ddgs") + return + _print_info(" No API key required. DuckDuckGo enforces server-side rate limits.") + _print_info(" Pair with an extract provider if you also need web_extract.") + elif post_setup_key == "spotify": # Run the full `hermes auth spotify` flow — if the user has no # client_id yet, this drops them into the interactive wizard diff --git a/scripts/release.py b/scripts/release.py index cc1ef9975d..74a4129cab 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -55,6 +55,7 @@ AUTHOR_MAP = { "127238744+teknium1@users.noreply.github.com": "teknium1", "128259593+Gutslabs@users.noreply.github.com": "Gutslabs", "50326054+nocturnum91@users.noreply.github.com": "nocturnum91", + "223003280+Abd0r@users.noreply.github.com": "Abd0r", "abdielv@proton.me": "AJV20", "mason@growagainorchids.com": "masonjames", "am@studio1.tailb672fe.ts.net": "subtract0", diff --git a/tests/tools/test_web_providers_brave_free.py b/tests/tools/test_web_providers_brave_free.py new file mode 100644 index 0000000000..36fe41640e --- /dev/null +++ b/tests/tools/test_web_providers_brave_free.py @@ -0,0 +1,275 @@ +"""Tests for the Brave Search (free tier) web search provider. + +Covers: +- BraveFreeSearchProvider.is_configured() env var gating +- BraveFreeSearchProvider.search() — happy path, HTTP error, request error, bad JSON +- Result normalization (title, url, description, position) +- Limit truncation + Brave's count cap (20) +- _is_backend_available("brave-free") integration +- _get_backend() recognizes "brave-free" as a valid configured backend +- check_web_api_key() includes brave-free in availability check +- web_extract / web_crawl return search-only errors when brave-free is active +""" +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + + +# --------------------------------------------------------------------------- +# BraveFreeSearchProvider unit tests +# --------------------------------------------------------------------------- + + +class TestBraveFreeProviderIsConfigured: + def test_configured_when_key_set(self, monkeypatch): + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + assert BraveFreeSearchProvider().is_configured() is True + + def test_not_configured_when_key_missing(self, monkeypatch): + monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False) + from tools.web_providers.brave_free import BraveFreeSearchProvider + assert BraveFreeSearchProvider().is_configured() is False + + def test_not_configured_when_key_whitespace(self, monkeypatch): + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", " ") + from tools.web_providers.brave_free import BraveFreeSearchProvider + assert BraveFreeSearchProvider().is_configured() is False + + def test_provider_name(self): + from tools.web_providers.brave_free import BraveFreeSearchProvider + assert BraveFreeSearchProvider().provider_name() == "brave-free" + + def test_implements_web_search_provider(self): + from tools.web_providers.base import WebSearchProvider + from tools.web_providers.brave_free import BraveFreeSearchProvider + assert issubclass(BraveFreeSearchProvider, WebSearchProvider) + + +class TestBraveFreeProviderSearch: + _SAMPLE_RESPONSE = { + "web": { + "results": [ + {"title": "A", "url": "https://a.example.com", "description": "desc A"}, + {"title": "B", "url": "https://b.example.com", "description": "desc B"}, + {"title": "C", "url": "https://c.example.com", "description": "desc C"}, + ] + } + } + + @staticmethod + def _mock_resp(json_data, status_code=200): + m = MagicMock() + m.status_code = status_code + m.json.return_value = json_data + m.raise_for_status = MagicMock() + return m + + def test_happy_path_normalizes_results(self, monkeypatch): + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + with patch("httpx.get", return_value=self._mock_resp(self._SAMPLE_RESPONSE)): + result = BraveFreeSearchProvider().search("test query", limit=5) + + assert result["success"] is True + web = result["data"]["web"] + assert len(web) == 3 + assert web[0] == {"title": "A", "url": "https://a.example.com", "description": "desc A", "position": 1} + assert web[2]["position"] == 3 + + def test_sends_subscription_token_header_and_count(self, monkeypatch): + """Brave uses X-Subscription-Token; count maps from limit.""" + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + captured = {} + + def fake_get(url, **kwargs): + captured["url"] = url + captured["headers"] = kwargs.get("headers", {}) + captured["params"] = kwargs.get("params", {}) + return self._mock_resp({"web": {"results": []}}) + + with patch("httpx.get", side_effect=fake_get): + BraveFreeSearchProvider().search("q", limit=5) + + assert captured["url"] == "https://api.search.brave.com/res/v1/web/search" + assert captured["headers"].get("X-Subscription-Token") == "BSAkey123" + assert captured["params"].get("q") == "q" + assert captured["params"].get("count") == 5 + + def test_count_is_capped_at_20(self, monkeypatch): + """Brave caps count at 20 — limit above that clamps.""" + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + captured = {} + + def fake_get(url, **kwargs): + captured["params"] = kwargs.get("params", {}) + return self._mock_resp({"web": {"results": []}}) + + with patch("httpx.get", side_effect=fake_get): + BraveFreeSearchProvider().search("q", limit=100) + + assert captured["params"].get("count") == 20 + + def test_limit_is_respected_client_side(self, monkeypatch): + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + with patch("httpx.get", return_value=self._mock_resp(self._SAMPLE_RESPONSE)): + result = BraveFreeSearchProvider().search("q", limit=2) + + assert result["success"] is True + assert len(result["data"]["web"]) == 2 + + def test_empty_results(self, monkeypatch): + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + with patch("httpx.get", return_value=self._mock_resp({"web": {"results": []}})): + result = BraveFreeSearchProvider().search("nothing", limit=5) + + assert result["success"] is True + assert result["data"]["web"] == [] + + def test_missing_web_key_returns_empty(self, monkeypatch): + """Responses without a ``web`` block should produce an empty result set, not crash.""" + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + with patch("httpx.get", return_value=self._mock_resp({})): + result = BraveFreeSearchProvider().search("q", limit=5) + + assert result["success"] is True + assert result["data"]["web"] == [] + + def test_http_error_returns_failure(self, monkeypatch): + import httpx + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + bad = MagicMock() + bad.status_code = 429 + err = httpx.HTTPStatusError("429", request=MagicMock(), response=bad) + + with patch("httpx.get", side_effect=err): + result = BraveFreeSearchProvider().search("q", limit=5) + + assert result["success"] is False + assert "429" in result["error"] + + def test_request_error_returns_failure(self, monkeypatch): + import httpx + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_providers.brave_free import BraveFreeSearchProvider + + with patch("httpx.get", side_effect=httpx.RequestError("boom")): + result = BraveFreeSearchProvider().search("q", limit=5) + + assert result["success"] is False + assert "boom" in result["error"] or "Brave" in result["error"] + + def test_missing_key_returns_failure(self, monkeypatch): + monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False) + from tools.web_providers.brave_free import BraveFreeSearchProvider + + result = BraveFreeSearchProvider().search("q", limit=5) + assert result["success"] is False + assert "BRAVE_SEARCH_API_KEY" in result["error"] + + +# --------------------------------------------------------------------------- +# Integration: _is_backend_available / _get_backend / check_web_api_key +# --------------------------------------------------------------------------- + + +class TestBraveFreeBackendWiring: + def test_is_backend_available_true_when_key_set(self, monkeypatch): + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + from tools.web_tools import _is_backend_available + assert _is_backend_available("brave-free") is True + + def test_is_backend_available_false_when_key_missing(self, monkeypatch): + monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False) + from tools.web_tools import _is_backend_available + assert _is_backend_available("brave-free") is False + + def test_configured_backend_accepted(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"}) + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + assert web_tools._get_backend() == "brave-free" + + def test_auto_detect_picks_brave_free_when_only_key_set(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY", + "TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False) + assert web_tools._get_backend() == "brave-free" + + def test_brave_free_does_not_override_paid_provider(self, monkeypatch): + """Tavily (higher priority) should win in auto-detect.""" + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY", "EXA_API_KEY", "SEARXNG_URL"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("TAVILY_API_KEY", "tvly") + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + assert web_tools._get_backend() == "tavily" + + def test_check_web_api_key_true_when_brave_free_configured(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"}) + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + assert web_tools.check_web_api_key() is True + + +# --------------------------------------------------------------------------- +# brave-free is search-only: web_extract / web_crawl return clear errors +# --------------------------------------------------------------------------- + + +class TestBraveFreeSearchOnlyErrors: + def test_web_extract_returns_search_only_error(self, monkeypatch): + import asyncio + from tools import web_tools + + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"}) + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False) + + result_str = asyncio.get_event_loop().run_until_complete( + web_tools.web_extract_tool(["https://example.com"]) + ) + result = json.loads(result_str) + assert result["success"] is False + assert "search-only" in result["error"].lower() + assert "brave" in result["error"].lower() + + def test_web_crawl_returns_search_only_error(self, monkeypatch): + import asyncio + from tools import web_tools + + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"}) + monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False) + monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False) + + result_str = asyncio.get_event_loop().run_until_complete( + web_tools.web_crawl_tool("https://example.com") + ) + result = json.loads(result_str) + assert result["success"] is False + assert "search-only" in result["error"].lower() + assert "brave" in result["error"].lower() diff --git a/tests/tools/test_web_providers_ddgs.py b/tests/tools/test_web_providers_ddgs.py new file mode 100644 index 0000000000..9a3ceec737 --- /dev/null +++ b/tests/tools/test_web_providers_ddgs.py @@ -0,0 +1,246 @@ +"""Tests for the DuckDuckGo (ddgs) web search provider. + +Covers: +- DDGSSearchProvider.is_configured() — reflects package importability +- DDGSSearchProvider.search() — happy path, missing package, runtime error +- Result normalization (title, url, description, position) +- _is_backend_available("ddgs") / _get_backend() integration +- web_extract / web_crawl return search-only errors when ddgs is active +""" +from __future__ import annotations + +import json +import sys +import types +from unittest.mock import MagicMock + + +def _install_fake_ddgs(monkeypatch, *, text_results=None, text_raises=None): + """Install a stub ``ddgs`` module in sys.modules for the duration of a test. + + ``text_results``: iterable of dicts to yield from DDGS().text(...). + ``text_raises``: if set, DDGS().text raises this exception instead. + """ + fake = types.ModuleType("ddgs") + + class _FakeDDGS: + def __enter__(self): + return self + def __exit__(self, *_a): + return False + def text(self, query, max_results=5): + if text_raises is not None: + raise text_raises + for hit in (text_results or []): + yield hit + + fake.DDGS = _FakeDDGS + monkeypatch.setitem(sys.modules, "ddgs", fake) + return fake + + +# --------------------------------------------------------------------------- +# DDGSSearchProvider unit tests +# --------------------------------------------------------------------------- + + +class TestDDGSProviderIsConfigured: + def test_configured_when_package_importable(self, monkeypatch): + _install_fake_ddgs(monkeypatch) + # Drop any cached ``tools.web_providers.ddgs`` so is_configured re-imports ddgs fresh + monkeypatch.delitem(sys.modules, "tools.web_providers.ddgs", raising=False) + from tools.web_providers.ddgs import DDGSSearchProvider + assert DDGSSearchProvider().is_configured() is True + + def test_not_configured_when_package_missing(self, monkeypatch): + monkeypatch.delitem(sys.modules, "ddgs", raising=False) + monkeypatch.delitem(sys.modules, "tools.web_providers.ddgs", raising=False) + # Block the import so ``import ddgs`` raises ImportError even if the package is actually installed + import builtins + orig_import = builtins.__import__ + + def blocked_import(name, *args, **kwargs): + if name == "ddgs": + raise ImportError("blocked for test") + return orig_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", blocked_import) + from tools.web_providers.ddgs import DDGSSearchProvider + assert DDGSSearchProvider().is_configured() is False + + def test_provider_name(self): + from tools.web_providers.ddgs import DDGSSearchProvider + assert DDGSSearchProvider().provider_name() == "ddgs" + + def test_implements_web_search_provider(self): + from tools.web_providers.base import WebSearchProvider + from tools.web_providers.ddgs import DDGSSearchProvider + assert issubclass(DDGSSearchProvider, WebSearchProvider) + + +class TestDDGSProviderSearch: + def test_happy_path_normalizes_results(self, monkeypatch): + _install_fake_ddgs(monkeypatch, text_results=[ + {"title": "A", "href": "https://a.example.com", "body": "desc A"}, + {"title": "B", "href": "https://b.example.com", "body": "desc B"}, + {"title": "C", "href": "https://c.example.com", "body": "desc C"}, + ]) + from tools.web_providers.ddgs import DDGSSearchProvider + + result = DDGSSearchProvider().search("q", limit=5) + + assert result["success"] is True + web = result["data"]["web"] + assert len(web) == 3 + assert web[0] == {"title": "A", "url": "https://a.example.com", "description": "desc A", "position": 1} + assert web[2]["position"] == 3 + + def test_accepts_url_key_as_fallback_for_href(self, monkeypatch): + _install_fake_ddgs(monkeypatch, text_results=[ + {"title": "A", "url": "https://a.example.com", "body": "desc A"}, + ]) + from tools.web_providers.ddgs import DDGSSearchProvider + + result = DDGSSearchProvider().search("q", limit=5) + + assert result["success"] is True + assert result["data"]["web"][0]["url"] == "https://a.example.com" + + def test_limit_is_respected(self, monkeypatch): + _install_fake_ddgs(monkeypatch, text_results=[ + {"title": f"R{i}", "href": f"https://r{i}.example.com", "body": ""} + for i in range(10) + ]) + from tools.web_providers.ddgs import DDGSSearchProvider + + result = DDGSSearchProvider().search("q", limit=3) + + assert result["success"] is True + assert len(result["data"]["web"]) == 3 + + def test_missing_package_returns_failure(self, monkeypatch): + monkeypatch.delitem(sys.modules, "ddgs", raising=False) + monkeypatch.delitem(sys.modules, "tools.web_providers.ddgs", raising=False) + import builtins + orig_import = builtins.__import__ + + def blocked_import(name, *args, **kwargs): + if name == "ddgs": + raise ImportError("blocked for test") + return orig_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", blocked_import) + from tools.web_providers.ddgs import DDGSSearchProvider + + result = DDGSSearchProvider().search("q", limit=5) + assert result["success"] is False + assert "ddgs" in result["error"].lower() + + def test_runtime_error_returns_failure(self, monkeypatch): + _install_fake_ddgs(monkeypatch, text_raises=RuntimeError("rate limited 202")) + from tools.web_providers.ddgs import DDGSSearchProvider + + result = DDGSSearchProvider().search("q", limit=5) + assert result["success"] is False + assert "rate limited" in result["error"] or "failed" in result["error"].lower() + + def test_empty_results(self, monkeypatch): + _install_fake_ddgs(monkeypatch, text_results=[]) + from tools.web_providers.ddgs import DDGSSearchProvider + + result = DDGSSearchProvider().search("nothing", limit=5) + assert result["success"] is True + assert result["data"]["web"] == [] + + +# --------------------------------------------------------------------------- +# Integration: _is_backend_available / _get_backend / check_web_api_key +# --------------------------------------------------------------------------- + + +class TestDDGSBackendWiring: + def test_is_backend_available_true_when_package_importable(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True) + assert web_tools._is_backend_available("ddgs") is True + + def test_is_backend_available_false_when_package_missing(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False) + assert web_tools._is_backend_available("ddgs") is False + + def test_configured_backend_accepted(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"}) + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True) + assert web_tools._get_backend() == "ddgs" + + def test_ddgs_trails_paid_providers_in_auto_detect(self, monkeypatch): + """Exa (priority) should win over ddgs in auto-detect.""" + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY", + "TAVILY_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("EXA_API_KEY", "exa-key") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True) + assert web_tools._get_backend() == "exa" + + def test_auto_detect_picks_ddgs_as_last_resort(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY", + "TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True) + assert web_tools._get_backend() == "ddgs" + + def test_check_web_api_key_true_when_ddgs_configured(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"}) + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True) + assert web_tools.check_web_api_key() is True + + +# --------------------------------------------------------------------------- +# ddgs is search-only: web_extract / web_crawl return clear errors +# --------------------------------------------------------------------------- + + +class TestDDGSSearchOnlyErrors: + def test_web_extract_returns_search_only_error(self, monkeypatch): + import asyncio + from tools import web_tools + + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"}) + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True) + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False) + + result_str = asyncio.get_event_loop().run_until_complete( + web_tools.web_extract_tool(["https://example.com"]) + ) + result = json.loads(result_str) + assert result["success"] is False + assert "search-only" in result["error"].lower() + assert "duckduckgo" in result["error"].lower() or "ddgs" in result["error"].lower() + + def test_web_crawl_returns_search_only_error(self, monkeypatch): + import asyncio + from tools import web_tools + + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"}) + monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True) + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False) + monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False) + + result_str = asyncio.get_event_loop().run_until_complete( + web_tools.web_crawl_tool("https://example.com") + ) + result = json.loads(result_str) + assert result["success"] is False + assert "search-only" in result["error"].lower() + assert "duckduckgo" in result["error"].lower() or "ddgs" in result["error"].lower() diff --git a/tools/web_providers/brave_free.py b/tools/web_providers/brave_free.py new file mode 100644 index 0000000000..52d02dec2a --- /dev/null +++ b/tools/web_providers/brave_free.py @@ -0,0 +1,130 @@ +"""Brave Search web search provider (free tier). + +Brave Search's Data-for-Search API offers a free tier (2,000 queries/mo at the +time of writing) after signing up at https://brave.com/search/api/. This +provider implements ``WebSearchProvider`` only — the Data-for-Search endpoint +returns search results, it does not extract/crawl arbitrary URLs. + +Configuration:: + + # ~/.hermes/.env + BRAVE_SEARCH_API_KEY=your-subscription-token + + # ~/.hermes/config.yaml + web: + search_backend: "brave-free" + extract_backend: "firecrawl" # pair with an extract provider if needed + +The API uses the ``X-Subscription-Token`` header. Free-tier keys are rate +limited (1 qps) and capped at 2k queries/month; see the Brave dashboard for +current quotas. +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict + +from tools.web_providers.base import WebSearchProvider + +logger = logging.getLogger(__name__) + +_BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search" + + +class BraveFreeSearchProvider(WebSearchProvider): + """Search via the Brave Search API (free tier). + + Requires ``BRAVE_SEARCH_API_KEY`` to be set. The value is passed as the + ``X-Subscription-Token`` header. No extract capability — pair with + Firecrawl/Tavily/Exa/Parallel when you also need ``web_extract``. + """ + + def provider_name(self) -> str: + return "brave-free" + + def is_configured(self) -> bool: + """Return True when ``BRAVE_SEARCH_API_KEY`` is set to a non-empty value.""" + return bool(os.getenv("BRAVE_SEARCH_API_KEY", "").strip()) + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + """Execute a search against the Brave Search API. + + Returns normalized results:: + + { + "success": True, + "data": { + "web": [ + { + "title": str, + "url": str, + "description": str, + "position": int, + }, + ... + ] + } + } + + On failure returns ``{"success": False, "error": str}``. + """ + import httpx + + api_key = os.getenv("BRAVE_SEARCH_API_KEY", "").strip() + if not api_key: + return {"success": False, "error": "BRAVE_SEARCH_API_KEY is not set"} + + # Brave's `count` is capped at 20. + count = max(1, min(int(limit), 20)) + + try: + resp = httpx.get( + _BRAVE_ENDPOINT, + params={"q": query, "count": count}, + headers={ + "X-Subscription-Token": api_key, + "Accept": "application/json", + }, + timeout=15, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as exc: + logger.warning("Brave Search HTTP error: %s", exc) + return { + "success": False, + "error": f"Brave Search returned HTTP {exc.response.status_code}", + } + except httpx.RequestError as exc: + logger.warning("Brave Search request error: %s", exc) + return {"success": False, "error": f"Could not reach Brave Search: {exc}"} + + try: + data = resp.json() + except Exception as exc: # noqa: BLE001 + logger.warning("Brave Search response parse error: %s", exc) + return {"success": False, "error": "Could not parse Brave Search response as JSON"} + + raw_results = (data.get("web") or {}).get("results", []) or [] + truncated = raw_results[:limit] + + web_results = [ + { + "title": str(r.get("title", "")), + "url": str(r.get("url", "")), + "description": str(r.get("description", "")), + "position": i + 1, + } + for i, r in enumerate(truncated) + ] + + logger.info( + "Brave Search '%s': %d results (from %d raw, limit %d)", + query, + len(web_results), + len(raw_results), + limit, + ) + + return {"success": True, "data": {"web": web_results}} diff --git a/tools/web_providers/ddgs.py b/tools/web_providers/ddgs.py new file mode 100644 index 0000000000..b81b97de2c --- /dev/null +++ b/tools/web_providers/ddgs.py @@ -0,0 +1,98 @@ +"""DuckDuckGo web search provider via the ``ddgs`` Python package. + +DuckDuckGo does not provide an official programmatic search API. The +community-maintained `ddgs `_ package (the +renamed successor of ``duckduckgo-search``) scrapes DuckDuckGo's HTML results +page and normalizes them. It implements ``WebSearchProvider`` only — there is +no extract capability. + +Configuration:: + + # No API key required. Enable by installing the package and pointing the + # web backend at ddgs: + pip install ddgs + + # ~/.hermes/config.yaml + web: + search_backend: "ddgs" + extract_backend: "firecrawl" # pair with an extract provider if needed + +Rate limits are enforced server-side by DuckDuckGo. Expect intermittent +``DuckDuckGoSearchException`` / 202 responses under heavy use; this provider +surfaces them as ``{"success": False, "error": ...}`` rather than crashing +the tool call. + +See https://duckduckgo.com/?q=duckduckgo+tos for terms of use. +""" + +from __future__ import annotations + +import logging +from typing import Any, Dict + +from tools.web_providers.base import WebSearchProvider + +logger = logging.getLogger(__name__) + + +class DDGSSearchProvider(WebSearchProvider): + """Search via the ``ddgs`` package (DuckDuckGo HTML scrape). + + No API key required. The provider is considered "configured" when the + ``ddgs`` package is importable — there is nothing else to set up. + """ + + def provider_name(self) -> str: + return "ddgs" + + def is_configured(self) -> bool: + """Return True when the ``ddgs`` package is importable. + + Called at tool-registration time; must not perform network I/O. + """ + try: + import ddgs # noqa: F401 + return True + except ImportError: + return False + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + """Execute a DuckDuckGo search and return normalized results. + + Returns ``{"success": True, "data": {"web": [...]}}`` on success or + ``{"success": False, "error": str}`` on failure (missing package, + rate-limited, network error, etc.). + """ + try: + from ddgs import DDGS # type: ignore + except ImportError: + return { + "success": False, + "error": "ddgs package is not installed — run `pip install ddgs`", + } + + # DDGS().text yields at most `max_results` items; we cap defensively + # in case the package ignores the hint. + safe_limit = max(1, int(limit)) + + try: + web_results = [] + with DDGS() as client: + for i, hit in enumerate(client.text(query, max_results=safe_limit)): + if i >= safe_limit: + break + url = str(hit.get("href") or hit.get("url") or "") + web_results.append( + { + "title": str(hit.get("title", "")), + "url": url, + "description": str(hit.get("body", "")), + "position": i + 1, + } + ) + except Exception as exc: # noqa: BLE001 — ddgs raises its own exceptions + logger.warning("DDGS search error: %s", exc) + return {"success": False, "error": f"DuckDuckGo search failed: {exc}"} + + logger.info("DDGS search '%s': %d results (limit %d)", query, len(web_results), limit) + return {"success": True, "data": {"web": web_results}} diff --git a/tools/web_tools.py b/tools/web_tools.py index e3268ac381..55fe5b1d68 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -126,18 +126,22 @@ def _get_backend() -> str: keys manually without running setup. """ configured = (_load_web_config().get("backend") or "").lower().strip() - if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng"): + if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs"): return configured # Fallback for manual / legacy config — pick the highest-priority # available backend. Firecrawl also counts as available when the managed # tool gateway is configured for Nous subscribers. + # Free-tier backends (searxng / brave-free / ddgs) trail the paid ones so + # existing paid setups are unaffected. backend_candidates = ( ("firecrawl", _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") or _is_tool_gateway_ready()), ("parallel", _has_env("PARALLEL_API_KEY")), ("tavily", _has_env("TAVILY_API_KEY")), ("exa", _has_env("EXA_API_KEY")), ("searxng", _has_env("SEARXNG_URL")), + ("brave-free", _has_env("BRAVE_SEARCH_API_KEY")), + ("ddgs", _ddgs_package_importable()), ) for backend, available in backend_candidates: if available: @@ -196,8 +200,27 @@ def _is_backend_available(backend: str) -> bool: return _has_env("TAVILY_API_KEY") if backend == "searxng": return _has_env("SEARXNG_URL") + if backend == "brave-free": + return _has_env("BRAVE_SEARCH_API_KEY") + if backend == "ddgs": + return _ddgs_package_importable() return False + +def _ddgs_package_importable() -> bool: + """Return True when the ``ddgs`` Python package can be imported. + + ddgs is the only backend whose availability is driven by a package + presence rather than an env var / config entry. Wrapped in a helper + so auto-detect and ``_is_backend_available`` share the same check + (and tests can monkeypatch a single symbol). + """ + try: + import ddgs # noqa: F401 + return True + except ImportError: + return False + # ─── Firecrawl Client ──────────────────────────────────────────────────────── _firecrawl_client = None @@ -1200,6 +1223,26 @@ def web_search_tool(query: str, limit: int = 5) -> str: _debug.save() return result_json + if backend == "brave-free": + from tools.web_providers.brave_free import BraveFreeSearchProvider + response_data = BraveFreeSearchProvider().search(query, limit) + debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", [])) + result_json = json.dumps(response_data, indent=2, ensure_ascii=False) + debug_call_data["final_response_size"] = len(result_json) + _debug.log_call("web_search_tool", debug_call_data) + _debug.save() + return result_json + + if backend == "ddgs": + from tools.web_providers.ddgs import DDGSSearchProvider + response_data = DDGSSearchProvider().search(query, limit) + debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", [])) + result_json = json.dumps(response_data, indent=2, ensure_ascii=False) + debug_call_data["final_response_size"] = len(result_json) + _debug.log_call("web_search_tool", debug_call_data) + _debug.save() + return result_json + if backend == "tavily": logger.info("Tavily search: '%s' (limit: %d)", query, limit) raw = _tavily_request("search", { @@ -1350,11 +1393,12 @@ async def web_extract_tool( "include_images": False, }) results = _normalize_tavily_documents(raw, fallback_url=safe_urls[0] if safe_urls else "") - elif backend == "searxng": - # SearXNG is search-only — it cannot extract URL content + elif backend in ("searxng", "brave-free", "ddgs"): + # These backends are search-only — they cannot extract URL content + _label = {"searxng": "SearXNG", "brave-free": "Brave Search (free tier)", "ddgs": "DuckDuckGo (ddgs)"}[backend] return json.dumps({ "success": False, - "error": "SearXNG is a search-only backend and cannot extract URL content. " + "error": f"{_label} is a search-only backend and cannot extract URL content. " "Set web.extract_backend to firecrawl, tavily, exa, or parallel.", }, ensure_ascii=False) else: @@ -1732,10 +1776,11 @@ async def web_crawl_tool( _debug.save() return cleaned_result - # SearXNG is search-only — it cannot crawl - if backend == "searxng": + # SearXNG / Brave Search (free tier) / DuckDuckGo (ddgs) are search-only — they cannot crawl + if backend in ("searxng", "brave-free", "ddgs"): + _label = {"searxng": "SearXNG", "brave-free": "Brave Search (free tier)", "ddgs": "DuckDuckGo (ddgs)"}[backend] return json.dumps({ - "error": "SearXNG is a search-only backend and cannot crawl URLs. " + "error": f"{_label} is a search-only backend and cannot crawl URLs. " "Set FIRECRAWL_API_KEY for crawling, or use web_search instead.", "success": False, }, ensure_ascii=False) @@ -2035,9 +2080,12 @@ def check_firecrawl_api_key() -> bool: def check_web_api_key() -> bool: """Check whether the configured web backend is available.""" configured = _load_web_config().get("backend", "").lower().strip() - if configured in ("exa", "parallel", "firecrawl", "tavily", "searxng"): + if configured in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs"): return _is_backend_available(configured) - return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng")) + return any( + _is_backend_available(backend) + for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs") + ) def check_auxiliary_model() -> bool: @@ -2074,6 +2122,10 @@ if __name__ == "__main__": print(" Using Tavily API (https://tavily.com)") elif backend == "searxng": print(f" Using SearXNG (search only): {os.getenv('SEARXNG_URL', '').strip()}") + elif backend == "brave-free": + print(" Using Brave Search free tier (search only)") + elif backend == "ddgs": + print(" Using DuckDuckGo via ddgs package (search only)") else: if firecrawl_url_available: print(f" Using self-hosted Firecrawl: {os.getenv('FIRECRAWL_API_URL').strip().rstrip('/')}")