mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
feat(web): add Brave Search (free tier) and DDGS search providers
Both implement WebSearchProvider via tools/web_providers/ — matching the
existing SearXNG pattern (PR #5c906d702). Search-only; pair with any
extract provider via web.extract_backend.
- tools/web_providers/brave_free.py — Brave Search API (free tier, 2k
queries/mo). Uses BRAVE_SEARCH_API_KEY as X-Subscription-Token.
- tools/web_providers/ddgs.py — DuckDuckGo via the ddgs Python package.
No API key; gated on package importability.
- tools/web_tools.py: both backends added to _get_backend() config list
and auto-detect chain (trails paid providers), _is_backend_available,
web_search_tool dispatch, web_extract_tool + web_crawl_tool search-only
refusals, check_web_api_key, and the __main__ diagnostic. Introduces
_ddgs_package_importable() helper so tests can monkeypatch a single
symbol for the ddgs availability check.
- hermes_cli/tools_config.py: picker entries for both providers; ddgs
gets a post_setup handler that runs `pip install ddgs`.
- hermes_cli/config.py: BRAVE_SEARCH_API_KEY in OPTIONAL_ENV_VARS.
- scripts/release.py: AUTHOR_MAP entry for @Abd0r.
- tests: 14 new tests (brave-free) + 15 new tests (ddgs) covering
provider unit behavior, backend wiring, and search-only refusals.
Salvages the brave-free + ddgs portion of PR #19796. Not included: the
in-line helpers in web_tools.py (replaced with provider modules to match
the shipped architecture), the lynx-based extract path (these backends
should refuse extract with a clear error — users pair with a real
extract provider), and scripts/start-llama-server.sh (unrelated).
Co-authored-by: Abd0r <223003280+Abd0r@users.noreply.github.com>
This commit is contained in:
parent
cdc0a47dd5
commit
04193cf71c
8 changed files with 862 additions and 9 deletions
|
|
@ -1864,6 +1864,14 @@ OPTIONAL_ENV_VARS = {
|
||||||
"password": False,
|
"password": False,
|
||||||
"category": "tool",
|
"category": "tool",
|
||||||
},
|
},
|
||||||
|
"BRAVE_SEARCH_API_KEY": {
|
||||||
|
"description": "Brave Search API subscription token (free tier: 2,000 queries/mo)",
|
||||||
|
"prompt": "Brave Search subscription token",
|
||||||
|
"url": "https://brave.com/search/api/",
|
||||||
|
"tools": ["web_search"],
|
||||||
|
"password": True,
|
||||||
|
"category": "tool",
|
||||||
|
},
|
||||||
"BROWSERBASE_API_KEY": {
|
"BROWSERBASE_API_KEY": {
|
||||||
"description": "Browserbase API key for cloud browser (optional — local browser works without this)",
|
"description": "Browserbase API key for cloud browser (optional — local browser works without this)",
|
||||||
"prompt": "Browserbase API key",
|
"prompt": "Browserbase API key",
|
||||||
|
|
|
||||||
|
|
@ -308,6 +308,23 @@ TOOL_CATEGORIES = {
|
||||||
{"key": "SEARXNG_URL", "prompt": "Your SearXNG instance URL (e.g., http://localhost:8080)", "url": "https://searxng.github.io/searxng/"},
|
{"key": "SEARXNG_URL", "prompt": "Your SearXNG instance URL (e.g., http://localhost:8080)", "url": "https://searxng.github.io/searxng/"},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Brave Search (Free Tier)",
|
||||||
|
"badge": "free tier · search only",
|
||||||
|
"tag": "2,000 queries/mo free — search only (pair with any extract provider)",
|
||||||
|
"web_backend": "brave-free",
|
||||||
|
"env_vars": [
|
||||||
|
{"key": "BRAVE_SEARCH_API_KEY", "prompt": "Brave Search subscription token", "url": "https://brave.com/search/api/"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "DuckDuckGo (ddgs)",
|
||||||
|
"badge": "free · no key · search only",
|
||||||
|
"tag": "Search via the ddgs Python package — no API key (pair with any extract provider)",
|
||||||
|
"web_backend": "ddgs",
|
||||||
|
"env_vars": [],
|
||||||
|
"post_setup": "ddgs",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"image_gen": {
|
"image_gen": {
|
||||||
|
|
@ -669,6 +686,32 @@ def _run_post_setup(post_setup_key: str):
|
||||||
_print_info(" Full voice list: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md")
|
_print_info(" Full voice list: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md")
|
||||||
_print_info(" Switch voices by setting tts.piper.voice in ~/.hermes/config.yaml")
|
_print_info(" Switch voices by setting tts.piper.voice in ~/.hermes/config.yaml")
|
||||||
|
|
||||||
|
elif post_setup_key == "ddgs":
|
||||||
|
try:
|
||||||
|
__import__("ddgs")
|
||||||
|
_print_success(" ddgs is already installed")
|
||||||
|
except ImportError:
|
||||||
|
import subprocess
|
||||||
|
_print_info(" Installing ddgs (DuckDuckGo search package)...")
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, "-m", "pip", "install", "-U", "ddgs", "--quiet"],
|
||||||
|
capture_output=True, text=True, timeout=300,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
_print_success(" ddgs installed")
|
||||||
|
else:
|
||||||
|
_print_warning(" ddgs install failed:")
|
||||||
|
_print_info(f" {result.stderr.strip()[:300]}")
|
||||||
|
_print_info(" Run manually: python -m pip install -U ddgs")
|
||||||
|
return
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
_print_warning(" ddgs install timed out (>5min)")
|
||||||
|
_print_info(" Run manually: python -m pip install -U ddgs")
|
||||||
|
return
|
||||||
|
_print_info(" No API key required. DuckDuckGo enforces server-side rate limits.")
|
||||||
|
_print_info(" Pair with an extract provider if you also need web_extract.")
|
||||||
|
|
||||||
elif post_setup_key == "spotify":
|
elif post_setup_key == "spotify":
|
||||||
# Run the full `hermes auth spotify` flow — if the user has no
|
# Run the full `hermes auth spotify` flow — if the user has no
|
||||||
# client_id yet, this drops them into the interactive wizard
|
# client_id yet, this drops them into the interactive wizard
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,7 @@ AUTHOR_MAP = {
|
||||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||||
"128259593+Gutslabs@users.noreply.github.com": "Gutslabs",
|
"128259593+Gutslabs@users.noreply.github.com": "Gutslabs",
|
||||||
"50326054+nocturnum91@users.noreply.github.com": "nocturnum91",
|
"50326054+nocturnum91@users.noreply.github.com": "nocturnum91",
|
||||||
|
"223003280+Abd0r@users.noreply.github.com": "Abd0r",
|
||||||
"abdielv@proton.me": "AJV20",
|
"abdielv@proton.me": "AJV20",
|
||||||
"mason@growagainorchids.com": "masonjames",
|
"mason@growagainorchids.com": "masonjames",
|
||||||
"am@studio1.tailb672fe.ts.net": "subtract0",
|
"am@studio1.tailb672fe.ts.net": "subtract0",
|
||||||
|
|
|
||||||
275
tests/tools/test_web_providers_brave_free.py
Normal file
275
tests/tools/test_web_providers_brave_free.py
Normal file
|
|
@ -0,0 +1,275 @@
|
||||||
|
"""Tests for the Brave Search (free tier) web search provider.
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
- BraveFreeSearchProvider.is_configured() env var gating
|
||||||
|
- BraveFreeSearchProvider.search() — happy path, HTTP error, request error, bad JSON
|
||||||
|
- Result normalization (title, url, description, position)
|
||||||
|
- Limit truncation + Brave's count cap (20)
|
||||||
|
- _is_backend_available("brave-free") integration
|
||||||
|
- _get_backend() recognizes "brave-free" as a valid configured backend
|
||||||
|
- check_web_api_key() includes brave-free in availability check
|
||||||
|
- web_extract / web_crawl return search-only errors when brave-free is active
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# BraveFreeSearchProvider unit tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBraveFreeProviderIsConfigured:
|
||||||
|
def test_configured_when_key_set(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
assert BraveFreeSearchProvider().is_configured() is True
|
||||||
|
|
||||||
|
def test_not_configured_when_key_missing(self, monkeypatch):
|
||||||
|
monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False)
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
assert BraveFreeSearchProvider().is_configured() is False
|
||||||
|
|
||||||
|
def test_not_configured_when_key_whitespace(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", " ")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
assert BraveFreeSearchProvider().is_configured() is False
|
||||||
|
|
||||||
|
def test_provider_name(self):
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
assert BraveFreeSearchProvider().provider_name() == "brave-free"
|
||||||
|
|
||||||
|
def test_implements_web_search_provider(self):
|
||||||
|
from tools.web_providers.base import WebSearchProvider
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
assert issubclass(BraveFreeSearchProvider, WebSearchProvider)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBraveFreeProviderSearch:
|
||||||
|
_SAMPLE_RESPONSE = {
|
||||||
|
"web": {
|
||||||
|
"results": [
|
||||||
|
{"title": "A", "url": "https://a.example.com", "description": "desc A"},
|
||||||
|
{"title": "B", "url": "https://b.example.com", "description": "desc B"},
|
||||||
|
{"title": "C", "url": "https://c.example.com", "description": "desc C"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _mock_resp(json_data, status_code=200):
|
||||||
|
m = MagicMock()
|
||||||
|
m.status_code = status_code
|
||||||
|
m.json.return_value = json_data
|
||||||
|
m.raise_for_status = MagicMock()
|
||||||
|
return m
|
||||||
|
|
||||||
|
def test_happy_path_normalizes_results(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
with patch("httpx.get", return_value=self._mock_resp(self._SAMPLE_RESPONSE)):
|
||||||
|
result = BraveFreeSearchProvider().search("test query", limit=5)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
web = result["data"]["web"]
|
||||||
|
assert len(web) == 3
|
||||||
|
assert web[0] == {"title": "A", "url": "https://a.example.com", "description": "desc A", "position": 1}
|
||||||
|
assert web[2]["position"] == 3
|
||||||
|
|
||||||
|
def test_sends_subscription_token_header_and_count(self, monkeypatch):
|
||||||
|
"""Brave uses X-Subscription-Token; count maps from limit."""
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_get(url, **kwargs):
|
||||||
|
captured["url"] = url
|
||||||
|
captured["headers"] = kwargs.get("headers", {})
|
||||||
|
captured["params"] = kwargs.get("params", {})
|
||||||
|
return self._mock_resp({"web": {"results": []}})
|
||||||
|
|
||||||
|
with patch("httpx.get", side_effect=fake_get):
|
||||||
|
BraveFreeSearchProvider().search("q", limit=5)
|
||||||
|
|
||||||
|
assert captured["url"] == "https://api.search.brave.com/res/v1/web/search"
|
||||||
|
assert captured["headers"].get("X-Subscription-Token") == "BSAkey123"
|
||||||
|
assert captured["params"].get("q") == "q"
|
||||||
|
assert captured["params"].get("count") == 5
|
||||||
|
|
||||||
|
def test_count_is_capped_at_20(self, monkeypatch):
|
||||||
|
"""Brave caps count at 20 — limit above that clamps."""
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_get(url, **kwargs):
|
||||||
|
captured["params"] = kwargs.get("params", {})
|
||||||
|
return self._mock_resp({"web": {"results": []}})
|
||||||
|
|
||||||
|
with patch("httpx.get", side_effect=fake_get):
|
||||||
|
BraveFreeSearchProvider().search("q", limit=100)
|
||||||
|
|
||||||
|
assert captured["params"].get("count") == 20
|
||||||
|
|
||||||
|
def test_limit_is_respected_client_side(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
with patch("httpx.get", return_value=self._mock_resp(self._SAMPLE_RESPONSE)):
|
||||||
|
result = BraveFreeSearchProvider().search("q", limit=2)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
assert len(result["data"]["web"]) == 2
|
||||||
|
|
||||||
|
def test_empty_results(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
with patch("httpx.get", return_value=self._mock_resp({"web": {"results": []}})):
|
||||||
|
result = BraveFreeSearchProvider().search("nothing", limit=5)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["data"]["web"] == []
|
||||||
|
|
||||||
|
def test_missing_web_key_returns_empty(self, monkeypatch):
|
||||||
|
"""Responses without a ``web`` block should produce an empty result set, not crash."""
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
with patch("httpx.get", return_value=self._mock_resp({})):
|
||||||
|
result = BraveFreeSearchProvider().search("q", limit=5)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["data"]["web"] == []
|
||||||
|
|
||||||
|
def test_http_error_returns_failure(self, monkeypatch):
|
||||||
|
import httpx
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
bad = MagicMock()
|
||||||
|
bad.status_code = 429
|
||||||
|
err = httpx.HTTPStatusError("429", request=MagicMock(), response=bad)
|
||||||
|
|
||||||
|
with patch("httpx.get", side_effect=err):
|
||||||
|
result = BraveFreeSearchProvider().search("q", limit=5)
|
||||||
|
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "429" in result["error"]
|
||||||
|
|
||||||
|
def test_request_error_returns_failure(self, monkeypatch):
|
||||||
|
import httpx
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
with patch("httpx.get", side_effect=httpx.RequestError("boom")):
|
||||||
|
result = BraveFreeSearchProvider().search("q", limit=5)
|
||||||
|
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "boom" in result["error"] or "Brave" in result["error"]
|
||||||
|
|
||||||
|
def test_missing_key_returns_failure(self, monkeypatch):
|
||||||
|
monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False)
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
|
||||||
|
result = BraveFreeSearchProvider().search("q", limit=5)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "BRAVE_SEARCH_API_KEY" in result["error"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Integration: _is_backend_available / _get_backend / check_web_api_key
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBraveFreeBackendWiring:
|
||||||
|
def test_is_backend_available_true_when_key_set(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
from tools.web_tools import _is_backend_available
|
||||||
|
assert _is_backend_available("brave-free") is True
|
||||||
|
|
||||||
|
def test_is_backend_available_false_when_key_missing(self, monkeypatch):
|
||||||
|
monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False)
|
||||||
|
from tools.web_tools import _is_backend_available
|
||||||
|
assert _is_backend_available("brave-free") is False
|
||||||
|
|
||||||
|
def test_configured_backend_accepted(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"})
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
assert web_tools._get_backend() == "brave-free"
|
||||||
|
|
||||||
|
def test_auto_detect_picks_brave_free_when_only_key_set(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||||
|
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY",
|
||||||
|
"TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL"):
|
||||||
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
|
||||||
|
assert web_tools._get_backend() == "brave-free"
|
||||||
|
|
||||||
|
def test_brave_free_does_not_override_paid_provider(self, monkeypatch):
|
||||||
|
"""Tavily (higher priority) should win in auto-detect."""
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||||
|
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY", "EXA_API_KEY", "SEARXNG_URL"):
|
||||||
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
monkeypatch.setenv("TAVILY_API_KEY", "tvly")
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
assert web_tools._get_backend() == "tavily"
|
||||||
|
|
||||||
|
def test_check_web_api_key_true_when_brave_free_configured(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"})
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
assert web_tools.check_web_api_key() is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# brave-free is search-only: web_extract / web_crawl return clear errors
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestBraveFreeSearchOnlyErrors:
|
||||||
|
def test_web_extract_returns_search_only_error(self, monkeypatch):
|
||||||
|
import asyncio
|
||||||
|
from tools import web_tools
|
||||||
|
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"})
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
|
||||||
|
|
||||||
|
result_str = asyncio.get_event_loop().run_until_complete(
|
||||||
|
web_tools.web_extract_tool(["https://example.com"])
|
||||||
|
)
|
||||||
|
result = json.loads(result_str)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "search-only" in result["error"].lower()
|
||||||
|
assert "brave" in result["error"].lower()
|
||||||
|
|
||||||
|
def test_web_crawl_returns_search_only_error(self, monkeypatch):
|
||||||
|
import asyncio
|
||||||
|
from tools import web_tools
|
||||||
|
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"})
|
||||||
|
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
|
||||||
|
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
|
||||||
|
|
||||||
|
result_str = asyncio.get_event_loop().run_until_complete(
|
||||||
|
web_tools.web_crawl_tool("https://example.com")
|
||||||
|
)
|
||||||
|
result = json.loads(result_str)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "search-only" in result["error"].lower()
|
||||||
|
assert "brave" in result["error"].lower()
|
||||||
246
tests/tools/test_web_providers_ddgs.py
Normal file
246
tests/tools/test_web_providers_ddgs.py
Normal file
|
|
@ -0,0 +1,246 @@
|
||||||
|
"""Tests for the DuckDuckGo (ddgs) web search provider.
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
- DDGSSearchProvider.is_configured() — reflects package importability
|
||||||
|
- DDGSSearchProvider.search() — happy path, missing package, runtime error
|
||||||
|
- Result normalization (title, url, description, position)
|
||||||
|
- _is_backend_available("ddgs") / _get_backend() integration
|
||||||
|
- web_extract / web_crawl return search-only errors when ddgs is active
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
||||||
|
def _install_fake_ddgs(monkeypatch, *, text_results=None, text_raises=None):
|
||||||
|
"""Install a stub ``ddgs`` module in sys.modules for the duration of a test.
|
||||||
|
|
||||||
|
``text_results``: iterable of dicts to yield from DDGS().text(...).
|
||||||
|
``text_raises``: if set, DDGS().text raises this exception instead.
|
||||||
|
"""
|
||||||
|
fake = types.ModuleType("ddgs")
|
||||||
|
|
||||||
|
class _FakeDDGS:
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
def __exit__(self, *_a):
|
||||||
|
return False
|
||||||
|
def text(self, query, max_results=5):
|
||||||
|
if text_raises is not None:
|
||||||
|
raise text_raises
|
||||||
|
for hit in (text_results or []):
|
||||||
|
yield hit
|
||||||
|
|
||||||
|
fake.DDGS = _FakeDDGS
|
||||||
|
monkeypatch.setitem(sys.modules, "ddgs", fake)
|
||||||
|
return fake
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DDGSSearchProvider unit tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDDGSProviderIsConfigured:
|
||||||
|
def test_configured_when_package_importable(self, monkeypatch):
|
||||||
|
_install_fake_ddgs(monkeypatch)
|
||||||
|
# Drop any cached ``tools.web_providers.ddgs`` so is_configured re-imports ddgs fresh
|
||||||
|
monkeypatch.delitem(sys.modules, "tools.web_providers.ddgs", raising=False)
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
assert DDGSSearchProvider().is_configured() is True
|
||||||
|
|
||||||
|
def test_not_configured_when_package_missing(self, monkeypatch):
|
||||||
|
monkeypatch.delitem(sys.modules, "ddgs", raising=False)
|
||||||
|
monkeypatch.delitem(sys.modules, "tools.web_providers.ddgs", raising=False)
|
||||||
|
# Block the import so ``import ddgs`` raises ImportError even if the package is actually installed
|
||||||
|
import builtins
|
||||||
|
orig_import = builtins.__import__
|
||||||
|
|
||||||
|
def blocked_import(name, *args, **kwargs):
|
||||||
|
if name == "ddgs":
|
||||||
|
raise ImportError("blocked for test")
|
||||||
|
return orig_import(name, *args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(builtins, "__import__", blocked_import)
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
assert DDGSSearchProvider().is_configured() is False
|
||||||
|
|
||||||
|
def test_provider_name(self):
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
assert DDGSSearchProvider().provider_name() == "ddgs"
|
||||||
|
|
||||||
|
def test_implements_web_search_provider(self):
|
||||||
|
from tools.web_providers.base import WebSearchProvider
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
assert issubclass(DDGSSearchProvider, WebSearchProvider)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDDGSProviderSearch:
|
||||||
|
def test_happy_path_normalizes_results(self, monkeypatch):
|
||||||
|
_install_fake_ddgs(monkeypatch, text_results=[
|
||||||
|
{"title": "A", "href": "https://a.example.com", "body": "desc A"},
|
||||||
|
{"title": "B", "href": "https://b.example.com", "body": "desc B"},
|
||||||
|
{"title": "C", "href": "https://c.example.com", "body": "desc C"},
|
||||||
|
])
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
|
||||||
|
result = DDGSSearchProvider().search("q", limit=5)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
web = result["data"]["web"]
|
||||||
|
assert len(web) == 3
|
||||||
|
assert web[0] == {"title": "A", "url": "https://a.example.com", "description": "desc A", "position": 1}
|
||||||
|
assert web[2]["position"] == 3
|
||||||
|
|
||||||
|
def test_accepts_url_key_as_fallback_for_href(self, monkeypatch):
|
||||||
|
_install_fake_ddgs(monkeypatch, text_results=[
|
||||||
|
{"title": "A", "url": "https://a.example.com", "body": "desc A"},
|
||||||
|
])
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
|
||||||
|
result = DDGSSearchProvider().search("q", limit=5)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["data"]["web"][0]["url"] == "https://a.example.com"
|
||||||
|
|
||||||
|
def test_limit_is_respected(self, monkeypatch):
|
||||||
|
_install_fake_ddgs(monkeypatch, text_results=[
|
||||||
|
{"title": f"R{i}", "href": f"https://r{i}.example.com", "body": ""}
|
||||||
|
for i in range(10)
|
||||||
|
])
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
|
||||||
|
result = DDGSSearchProvider().search("q", limit=3)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
assert len(result["data"]["web"]) == 3
|
||||||
|
|
||||||
|
def test_missing_package_returns_failure(self, monkeypatch):
|
||||||
|
monkeypatch.delitem(sys.modules, "ddgs", raising=False)
|
||||||
|
monkeypatch.delitem(sys.modules, "tools.web_providers.ddgs", raising=False)
|
||||||
|
import builtins
|
||||||
|
orig_import = builtins.__import__
|
||||||
|
|
||||||
|
def blocked_import(name, *args, **kwargs):
|
||||||
|
if name == "ddgs":
|
||||||
|
raise ImportError("blocked for test")
|
||||||
|
return orig_import(name, *args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(builtins, "__import__", blocked_import)
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
|
||||||
|
result = DDGSSearchProvider().search("q", limit=5)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "ddgs" in result["error"].lower()
|
||||||
|
|
||||||
|
def test_runtime_error_returns_failure(self, monkeypatch):
|
||||||
|
_install_fake_ddgs(monkeypatch, text_raises=RuntimeError("rate limited 202"))
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
|
||||||
|
result = DDGSSearchProvider().search("q", limit=5)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "rate limited" in result["error"] or "failed" in result["error"].lower()
|
||||||
|
|
||||||
|
def test_empty_results(self, monkeypatch):
|
||||||
|
_install_fake_ddgs(monkeypatch, text_results=[])
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
|
||||||
|
result = DDGSSearchProvider().search("nothing", limit=5)
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["data"]["web"] == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Integration: _is_backend_available / _get_backend / check_web_api_key
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDDGSBackendWiring:
|
||||||
|
def test_is_backend_available_true_when_package_importable(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||||
|
assert web_tools._is_backend_available("ddgs") is True
|
||||||
|
|
||||||
|
def test_is_backend_available_false_when_package_missing(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
|
||||||
|
assert web_tools._is_backend_available("ddgs") is False
|
||||||
|
|
||||||
|
def test_configured_backend_accepted(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"})
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||||
|
assert web_tools._get_backend() == "ddgs"
|
||||||
|
|
||||||
|
def test_ddgs_trails_paid_providers_in_auto_detect(self, monkeypatch):
|
||||||
|
"""Exa (priority) should win over ddgs in auto-detect."""
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||||
|
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY",
|
||||||
|
"TAVILY_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"):
|
||||||
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
monkeypatch.setenv("EXA_API_KEY", "exa-key")
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||||
|
assert web_tools._get_backend() == "exa"
|
||||||
|
|
||||||
|
def test_auto_detect_picks_ddgs_as_last_resort(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
|
||||||
|
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY",
|
||||||
|
"TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"):
|
||||||
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||||
|
assert web_tools._get_backend() == "ddgs"
|
||||||
|
|
||||||
|
def test_check_web_api_key_true_when_ddgs_configured(self, monkeypatch):
|
||||||
|
from tools import web_tools
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"})
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||||
|
assert web_tools.check_web_api_key() is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ddgs is search-only: web_extract / web_crawl return clear errors
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDDGSSearchOnlyErrors:
|
||||||
|
def test_web_extract_returns_search_only_error(self, monkeypatch):
|
||||||
|
import asyncio
|
||||||
|
from tools import web_tools
|
||||||
|
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"})
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
|
||||||
|
|
||||||
|
result_str = asyncio.get_event_loop().run_until_complete(
|
||||||
|
web_tools.web_extract_tool(["https://example.com"])
|
||||||
|
)
|
||||||
|
result = json.loads(result_str)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "search-only" in result["error"].lower()
|
||||||
|
assert "duckduckgo" in result["error"].lower() or "ddgs" in result["error"].lower()
|
||||||
|
|
||||||
|
def test_web_crawl_returns_search_only_error(self, monkeypatch):
|
||||||
|
import asyncio
|
||||||
|
from tools import web_tools
|
||||||
|
|
||||||
|
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"})
|
||||||
|
monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
|
||||||
|
monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
|
||||||
|
monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
|
||||||
|
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
|
||||||
|
|
||||||
|
result_str = asyncio.get_event_loop().run_until_complete(
|
||||||
|
web_tools.web_crawl_tool("https://example.com")
|
||||||
|
)
|
||||||
|
result = json.loads(result_str)
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "search-only" in result["error"].lower()
|
||||||
|
assert "duckduckgo" in result["error"].lower() or "ddgs" in result["error"].lower()
|
||||||
130
tools/web_providers/brave_free.py
Normal file
130
tools/web_providers/brave_free.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
"""Brave Search web search provider (free tier).
|
||||||
|
|
||||||
|
Brave Search's Data-for-Search API offers a free tier (2,000 queries/mo at the
|
||||||
|
time of writing) after signing up at https://brave.com/search/api/. This
|
||||||
|
provider implements ``WebSearchProvider`` only — the Data-for-Search endpoint
|
||||||
|
returns search results, it does not extract/crawl arbitrary URLs.
|
||||||
|
|
||||||
|
Configuration::
|
||||||
|
|
||||||
|
# ~/.hermes/.env
|
||||||
|
BRAVE_SEARCH_API_KEY=your-subscription-token
|
||||||
|
|
||||||
|
# ~/.hermes/config.yaml
|
||||||
|
web:
|
||||||
|
search_backend: "brave-free"
|
||||||
|
extract_backend: "firecrawl" # pair with an extract provider if needed
|
||||||
|
|
||||||
|
The API uses the ``X-Subscription-Token`` header. Free-tier keys are rate
|
||||||
|
limited (1 qps) and capped at 2k queries/month; see the Brave dashboard for
|
||||||
|
current quotas.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from tools.web_providers.base import WebSearchProvider
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
||||||
|
|
||||||
|
|
||||||
|
class BraveFreeSearchProvider(WebSearchProvider):
|
||||||
|
"""Search via the Brave Search API (free tier).
|
||||||
|
|
||||||
|
Requires ``BRAVE_SEARCH_API_KEY`` to be set. The value is passed as the
|
||||||
|
``X-Subscription-Token`` header. No extract capability — pair with
|
||||||
|
Firecrawl/Tavily/Exa/Parallel when you also need ``web_extract``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def provider_name(self) -> str:
|
||||||
|
return "brave-free"
|
||||||
|
|
||||||
|
def is_configured(self) -> bool:
|
||||||
|
"""Return True when ``BRAVE_SEARCH_API_KEY`` is set to a non-empty value."""
|
||||||
|
return bool(os.getenv("BRAVE_SEARCH_API_KEY", "").strip())
|
||||||
|
|
||||||
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||||
|
"""Execute a search against the Brave Search API.
|
||||||
|
|
||||||
|
Returns normalized results::
|
||||||
|
|
||||||
|
{
|
||||||
|
"success": True,
|
||||||
|
"data": {
|
||||||
|
"web": [
|
||||||
|
{
|
||||||
|
"title": str,
|
||||||
|
"url": str,
|
||||||
|
"description": str,
|
||||||
|
"position": int,
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
On failure returns ``{"success": False, "error": str}``.
|
||||||
|
"""
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
api_key = os.getenv("BRAVE_SEARCH_API_KEY", "").strip()
|
||||||
|
if not api_key:
|
||||||
|
return {"success": False, "error": "BRAVE_SEARCH_API_KEY is not set"}
|
||||||
|
|
||||||
|
# Brave's `count` is capped at 20.
|
||||||
|
count = max(1, min(int(limit), 20))
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
_BRAVE_ENDPOINT,
|
||||||
|
params={"q": query, "count": count},
|
||||||
|
headers={
|
||||||
|
"X-Subscription-Token": api_key,
|
||||||
|
"Accept": "application/json",
|
||||||
|
},
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
logger.warning("Brave Search HTTP error: %s", exc)
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": f"Brave Search returned HTTP {exc.response.status_code}",
|
||||||
|
}
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
logger.warning("Brave Search request error: %s", exc)
|
||||||
|
return {"success": False, "error": f"Could not reach Brave Search: {exc}"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.warning("Brave Search response parse error: %s", exc)
|
||||||
|
return {"success": False, "error": "Could not parse Brave Search response as JSON"}
|
||||||
|
|
||||||
|
raw_results = (data.get("web") or {}).get("results", []) or []
|
||||||
|
truncated = raw_results[:limit]
|
||||||
|
|
||||||
|
web_results = [
|
||||||
|
{
|
||||||
|
"title": str(r.get("title", "")),
|
||||||
|
"url": str(r.get("url", "")),
|
||||||
|
"description": str(r.get("description", "")),
|
||||||
|
"position": i + 1,
|
||||||
|
}
|
||||||
|
for i, r in enumerate(truncated)
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Brave Search '%s': %d results (from %d raw, limit %d)",
|
||||||
|
query,
|
||||||
|
len(web_results),
|
||||||
|
len(raw_results),
|
||||||
|
limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"success": True, "data": {"web": web_results}}
|
||||||
98
tools/web_providers/ddgs.py
Normal file
98
tools/web_providers/ddgs.py
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
"""DuckDuckGo web search provider via the ``ddgs`` Python package.
|
||||||
|
|
||||||
|
DuckDuckGo does not provide an official programmatic search API. The
|
||||||
|
community-maintained `ddgs <https://pypi.org/project/ddgs/>`_ package (the
|
||||||
|
renamed successor of ``duckduckgo-search``) scrapes DuckDuckGo's HTML results
|
||||||
|
page and normalizes them. It implements ``WebSearchProvider`` only — there is
|
||||||
|
no extract capability.
|
||||||
|
|
||||||
|
Configuration::
|
||||||
|
|
||||||
|
# No API key required. Enable by installing the package and pointing the
|
||||||
|
# web backend at ddgs:
|
||||||
|
pip install ddgs
|
||||||
|
|
||||||
|
# ~/.hermes/config.yaml
|
||||||
|
web:
|
||||||
|
search_backend: "ddgs"
|
||||||
|
extract_backend: "firecrawl" # pair with an extract provider if needed
|
||||||
|
|
||||||
|
Rate limits are enforced server-side by DuckDuckGo. Expect intermittent
|
||||||
|
``DuckDuckGoSearchException`` / 202 responses under heavy use; this provider
|
||||||
|
surfaces them as ``{"success": False, "error": ...}`` rather than crashing
|
||||||
|
the tool call.
|
||||||
|
|
||||||
|
See https://duckduckgo.com/?q=duckduckgo+tos for terms of use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from tools.web_providers.base import WebSearchProvider
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DDGSSearchProvider(WebSearchProvider):
|
||||||
|
"""Search via the ``ddgs`` package (DuckDuckGo HTML scrape).
|
||||||
|
|
||||||
|
No API key required. The provider is considered "configured" when the
|
||||||
|
``ddgs`` package is importable — there is nothing else to set up.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def provider_name(self) -> str:
|
||||||
|
return "ddgs"
|
||||||
|
|
||||||
|
def is_configured(self) -> bool:
|
||||||
|
"""Return True when the ``ddgs`` package is importable.
|
||||||
|
|
||||||
|
Called at tool-registration time; must not perform network I/O.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import ddgs # noqa: F401
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||||
|
"""Execute a DuckDuckGo search and return normalized results.
|
||||||
|
|
||||||
|
Returns ``{"success": True, "data": {"web": [...]}}`` on success or
|
||||||
|
``{"success": False, "error": str}`` on failure (missing package,
|
||||||
|
rate-limited, network error, etc.).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from ddgs import DDGS # type: ignore
|
||||||
|
except ImportError:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "ddgs package is not installed — run `pip install ddgs`",
|
||||||
|
}
|
||||||
|
|
||||||
|
# DDGS().text yields at most `max_results` items; we cap defensively
|
||||||
|
# in case the package ignores the hint.
|
||||||
|
safe_limit = max(1, int(limit))
|
||||||
|
|
||||||
|
try:
|
||||||
|
web_results = []
|
||||||
|
with DDGS() as client:
|
||||||
|
for i, hit in enumerate(client.text(query, max_results=safe_limit)):
|
||||||
|
if i >= safe_limit:
|
||||||
|
break
|
||||||
|
url = str(hit.get("href") or hit.get("url") or "")
|
||||||
|
web_results.append(
|
||||||
|
{
|
||||||
|
"title": str(hit.get("title", "")),
|
||||||
|
"url": url,
|
||||||
|
"description": str(hit.get("body", "")),
|
||||||
|
"position": i + 1,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as exc: # noqa: BLE001 — ddgs raises its own exceptions
|
||||||
|
logger.warning("DDGS search error: %s", exc)
|
||||||
|
return {"success": False, "error": f"DuckDuckGo search failed: {exc}"}
|
||||||
|
|
||||||
|
logger.info("DDGS search '%s': %d results (limit %d)", query, len(web_results), limit)
|
||||||
|
return {"success": True, "data": {"web": web_results}}
|
||||||
|
|
@ -126,18 +126,22 @@ def _get_backend() -> str:
|
||||||
keys manually without running setup.
|
keys manually without running setup.
|
||||||
"""
|
"""
|
||||||
configured = (_load_web_config().get("backend") or "").lower().strip()
|
configured = (_load_web_config().get("backend") or "").lower().strip()
|
||||||
if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng"):
|
if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs"):
|
||||||
return configured
|
return configured
|
||||||
|
|
||||||
# Fallback for manual / legacy config — pick the highest-priority
|
# Fallback for manual / legacy config — pick the highest-priority
|
||||||
# available backend. Firecrawl also counts as available when the managed
|
# available backend. Firecrawl also counts as available when the managed
|
||||||
# tool gateway is configured for Nous subscribers.
|
# tool gateway is configured for Nous subscribers.
|
||||||
|
# Free-tier backends (searxng / brave-free / ddgs) trail the paid ones so
|
||||||
|
# existing paid setups are unaffected.
|
||||||
backend_candidates = (
|
backend_candidates = (
|
||||||
("firecrawl", _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") or _is_tool_gateway_ready()),
|
("firecrawl", _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") or _is_tool_gateway_ready()),
|
||||||
("parallel", _has_env("PARALLEL_API_KEY")),
|
("parallel", _has_env("PARALLEL_API_KEY")),
|
||||||
("tavily", _has_env("TAVILY_API_KEY")),
|
("tavily", _has_env("TAVILY_API_KEY")),
|
||||||
("exa", _has_env("EXA_API_KEY")),
|
("exa", _has_env("EXA_API_KEY")),
|
||||||
("searxng", _has_env("SEARXNG_URL")),
|
("searxng", _has_env("SEARXNG_URL")),
|
||||||
|
("brave-free", _has_env("BRAVE_SEARCH_API_KEY")),
|
||||||
|
("ddgs", _ddgs_package_importable()),
|
||||||
)
|
)
|
||||||
for backend, available in backend_candidates:
|
for backend, available in backend_candidates:
|
||||||
if available:
|
if available:
|
||||||
|
|
@ -196,8 +200,27 @@ def _is_backend_available(backend: str) -> bool:
|
||||||
return _has_env("TAVILY_API_KEY")
|
return _has_env("TAVILY_API_KEY")
|
||||||
if backend == "searxng":
|
if backend == "searxng":
|
||||||
return _has_env("SEARXNG_URL")
|
return _has_env("SEARXNG_URL")
|
||||||
|
if backend == "brave-free":
|
||||||
|
return _has_env("BRAVE_SEARCH_API_KEY")
|
||||||
|
if backend == "ddgs":
|
||||||
|
return _ddgs_package_importable()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _ddgs_package_importable() -> bool:
|
||||||
|
"""Return True when the ``ddgs`` Python package can be imported.
|
||||||
|
|
||||||
|
ddgs is the only backend whose availability is driven by a package
|
||||||
|
presence rather than an env var / config entry. Wrapped in a helper
|
||||||
|
so auto-detect and ``_is_backend_available`` share the same check
|
||||||
|
(and tests can monkeypatch a single symbol).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import ddgs # noqa: F401
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
|
|
||||||
# ─── Firecrawl Client ────────────────────────────────────────────────────────
|
# ─── Firecrawl Client ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_firecrawl_client = None
|
_firecrawl_client = None
|
||||||
|
|
@ -1200,6 +1223,26 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
||||||
_debug.save()
|
_debug.save()
|
||||||
return result_json
|
return result_json
|
||||||
|
|
||||||
|
if backend == "brave-free":
|
||||||
|
from tools.web_providers.brave_free import BraveFreeSearchProvider
|
||||||
|
response_data = BraveFreeSearchProvider().search(query, limit)
|
||||||
|
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
|
||||||
|
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
|
||||||
|
debug_call_data["final_response_size"] = len(result_json)
|
||||||
|
_debug.log_call("web_search_tool", debug_call_data)
|
||||||
|
_debug.save()
|
||||||
|
return result_json
|
||||||
|
|
||||||
|
if backend == "ddgs":
|
||||||
|
from tools.web_providers.ddgs import DDGSSearchProvider
|
||||||
|
response_data = DDGSSearchProvider().search(query, limit)
|
||||||
|
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
|
||||||
|
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
|
||||||
|
debug_call_data["final_response_size"] = len(result_json)
|
||||||
|
_debug.log_call("web_search_tool", debug_call_data)
|
||||||
|
_debug.save()
|
||||||
|
return result_json
|
||||||
|
|
||||||
if backend == "tavily":
|
if backend == "tavily":
|
||||||
logger.info("Tavily search: '%s' (limit: %d)", query, limit)
|
logger.info("Tavily search: '%s' (limit: %d)", query, limit)
|
||||||
raw = _tavily_request("search", {
|
raw = _tavily_request("search", {
|
||||||
|
|
@ -1350,11 +1393,12 @@ async def web_extract_tool(
|
||||||
"include_images": False,
|
"include_images": False,
|
||||||
})
|
})
|
||||||
results = _normalize_tavily_documents(raw, fallback_url=safe_urls[0] if safe_urls else "")
|
results = _normalize_tavily_documents(raw, fallback_url=safe_urls[0] if safe_urls else "")
|
||||||
elif backend == "searxng":
|
elif backend in ("searxng", "brave-free", "ddgs"):
|
||||||
# SearXNG is search-only — it cannot extract URL content
|
# These backends are search-only — they cannot extract URL content
|
||||||
|
_label = {"searxng": "SearXNG", "brave-free": "Brave Search (free tier)", "ddgs": "DuckDuckGo (ddgs)"}[backend]
|
||||||
return json.dumps({
|
return json.dumps({
|
||||||
"success": False,
|
"success": False,
|
||||||
"error": "SearXNG is a search-only backend and cannot extract URL content. "
|
"error": f"{_label} is a search-only backend and cannot extract URL content. "
|
||||||
"Set web.extract_backend to firecrawl, tavily, exa, or parallel.",
|
"Set web.extract_backend to firecrawl, tavily, exa, or parallel.",
|
||||||
}, ensure_ascii=False)
|
}, ensure_ascii=False)
|
||||||
else:
|
else:
|
||||||
|
|
@ -1732,10 +1776,11 @@ async def web_crawl_tool(
|
||||||
_debug.save()
|
_debug.save()
|
||||||
return cleaned_result
|
return cleaned_result
|
||||||
|
|
||||||
# SearXNG is search-only — it cannot crawl
|
# SearXNG / Brave Search (free tier) / DuckDuckGo (ddgs) are search-only — they cannot crawl
|
||||||
if backend == "searxng":
|
if backend in ("searxng", "brave-free", "ddgs"):
|
||||||
|
_label = {"searxng": "SearXNG", "brave-free": "Brave Search (free tier)", "ddgs": "DuckDuckGo (ddgs)"}[backend]
|
||||||
return json.dumps({
|
return json.dumps({
|
||||||
"error": "SearXNG is a search-only backend and cannot crawl URLs. "
|
"error": f"{_label} is a search-only backend and cannot crawl URLs. "
|
||||||
"Set FIRECRAWL_API_KEY for crawling, or use web_search instead.",
|
"Set FIRECRAWL_API_KEY for crawling, or use web_search instead.",
|
||||||
"success": False,
|
"success": False,
|
||||||
}, ensure_ascii=False)
|
}, ensure_ascii=False)
|
||||||
|
|
@ -2035,9 +2080,12 @@ def check_firecrawl_api_key() -> bool:
|
||||||
def check_web_api_key() -> bool:
|
def check_web_api_key() -> bool:
|
||||||
"""Check whether the configured web backend is available."""
|
"""Check whether the configured web backend is available."""
|
||||||
configured = _load_web_config().get("backend", "").lower().strip()
|
configured = _load_web_config().get("backend", "").lower().strip()
|
||||||
if configured in ("exa", "parallel", "firecrawl", "tavily", "searxng"):
|
if configured in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs"):
|
||||||
return _is_backend_available(configured)
|
return _is_backend_available(configured)
|
||||||
return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng"))
|
return any(
|
||||||
|
_is_backend_available(backend)
|
||||||
|
for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def check_auxiliary_model() -> bool:
|
def check_auxiliary_model() -> bool:
|
||||||
|
|
@ -2074,6 +2122,10 @@ if __name__ == "__main__":
|
||||||
print(" Using Tavily API (https://tavily.com)")
|
print(" Using Tavily API (https://tavily.com)")
|
||||||
elif backend == "searxng":
|
elif backend == "searxng":
|
||||||
print(f" Using SearXNG (search only): {os.getenv('SEARXNG_URL', '').strip()}")
|
print(f" Using SearXNG (search only): {os.getenv('SEARXNG_URL', '').strip()}")
|
||||||
|
elif backend == "brave-free":
|
||||||
|
print(" Using Brave Search free tier (search only)")
|
||||||
|
elif backend == "ddgs":
|
||||||
|
print(" Using DuckDuckGo via ddgs package (search only)")
|
||||||
else:
|
else:
|
||||||
if firecrawl_url_available:
|
if firecrawl_url_available:
|
||||||
print(f" Using self-hosted Firecrawl: {os.getenv('FIRECRAWL_API_URL').strip().rstrip('/')}")
|
print(f" Using self-hosted Firecrawl: {os.getenv('FIRECRAWL_API_URL').strip().rstrip('/')}")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue