diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 76bb3f07af..cf2b0b528a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1832,6 +1832,14 @@ OPTIONAL_ENV_VARS = { "password": True, "category": "tool", }, + "SEARXNG_URL": { + "description": "URL of your SearXNG instance for free self-hosted web search", + "prompt": "SearXNG URL (e.g. http://localhost:8080)", + "url": "https://searxng.github.io/searxng/", + "tools": ["web_search"], + "password": False, + "category": "tool", + }, "BROWSERBASE_API_KEY": { "description": "Browserbase API key for cloud browser (optional — local browser works without this)", "prompt": "Browserbase API key", diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index c83844901f..be027e85cd 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -255,6 +255,10 @@ def get_nous_subscription_features( terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {} web_backend = str(web_cfg.get("backend") or "").strip().lower() + # Per-capability overrides: if set, they determine which backend is active for + # search/extract independently of web.backend. + web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower() + web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower() tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower() browser_provider_explicit = "cloud_provider" in browser_cfg browser_provider = normalize_browser_cloud_provider( @@ -280,6 +284,7 @@ def get_nous_subscription_features( direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL")) direct_parallel = bool(get_env_value("PARALLEL_API_KEY")) direct_tavily = bool(get_env_value("TAVILY_API_KEY")) + direct_searxng = bool(get_env_value("SEARXNG_URL")) direct_fal = fal_key_is_configured() direct_openai_tts = bool(resolve_openai_audio_api_key()) direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY")) @@ -323,10 +328,18 @@ def get_nous_subscription_features( or (web_backend == "firecrawl" and direct_firecrawl) or (web_backend == "parallel" and direct_parallel) or (web_backend == "tavily" and direct_tavily) + or (web_backend == "searxng" and direct_searxng) + # Per-capability overrides: search_backend or extract_backend may be set + # without web.backend (using the new split config from #20061) + or (web_search_backend == "searxng" and direct_searxng) + or (web_search_backend == "exa" and direct_exa) + or (web_search_backend == "firecrawl" and direct_firecrawl) + or (web_search_backend == "parallel" and direct_parallel) + or (web_search_backend == "tavily" and direct_tavily) ) ) web_available = bool( - managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily + managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily or direct_searxng ) image_managed = image_tool_enabled and managed_image_available and not direct_fal @@ -412,8 +425,8 @@ def get_nous_subscription_features( managed_by_nous=web_managed, direct_override=web_active and not web_managed, toolset_enabled=web_tool_enabled, - current_provider=web_backend or "", - explicit_configured=bool(web_backend), + current_provider=web_backend or web_search_backend or "", + explicit_configured=bool(web_backend or web_search_backend), ), "image_gen": NousFeatureState( key="image_gen", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 19e9366a20..e82bdafdfa 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -394,7 +394,7 @@ def _print_setup_summary(config: dict, hermes_home): label = f"Web Search & Extract ({subscription_features.web.current_provider})" tool_status.append((label, True, None)) else: - tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY")) + tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, TAVILY_API_KEY, or SEARXNG_URL")) # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl) browser_provider = subscription_features.browser.current_provider diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 14d82caa65..b258e15998 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -299,6 +299,15 @@ TOOL_CATEGORIES = { {"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"}, ], }, + { + "name": "SearXNG", + "badge": "free · self-hosted · search only", + "tag": "Privacy-respecting metasearch engine — search only (pair with any extract provider)", + "web_backend": "searxng", + "env_vars": [ + {"key": "SEARXNG_URL", "prompt": "Your SearXNG instance URL (e.g., http://localhost:8080)", "url": "https://searxng.github.io/searxng/"}, + ], + }, ], }, "image_gen": { diff --git a/tests/tools/test_web_providers_searxng.py b/tests/tools/test_web_providers_searxng.py new file mode 100644 index 0000000000..4779ed6ce6 --- /dev/null +++ b/tests/tools/test_web_providers_searxng.py @@ -0,0 +1,337 @@ +"""Tests for the SearXNG web search provider. + +Covers: +- SearXNGSearchProvider.is_configured() env var gating +- SearXNGSearchProvider.search() — happy path, HTTP error, request error, bad JSON +- Result normalization (title, url, description, position) +- Score-based sorting and limit truncation +- _is_backend_available("searxng") integration +- _get_backend() recognizes "searxng" as a valid configured backend +- check_web_api_key() includes searxng in availability check +""" +from __future__ import annotations + +import json +import os +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# SearXNGSearchProvider unit tests +# --------------------------------------------------------------------------- + + +class TestSearXNGSearchProviderIsConfigured: + def test_configured_when_url_set(self, monkeypatch): + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + assert SearXNGSearchProvider().is_configured() is True + + def test_not_configured_when_url_missing(self, monkeypatch): + monkeypatch.delenv("SEARXNG_URL", raising=False) + from tools.web_providers.searxng import SearXNGSearchProvider + assert SearXNGSearchProvider().is_configured() is False + + def test_not_configured_when_url_empty_string(self, monkeypatch): + monkeypatch.setenv("SEARXNG_URL", " ") + from tools.web_providers.searxng import SearXNGSearchProvider + assert SearXNGSearchProvider().is_configured() is False + + def test_provider_name(self): + from tools.web_providers.searxng import SearXNGSearchProvider + assert SearXNGSearchProvider().provider_name() == "searxng" + + def test_implements_web_search_provider(self): + from tools.web_providers.base import WebSearchProvider + from tools.web_providers.searxng import SearXNGSearchProvider + assert issubclass(SearXNGSearchProvider, WebSearchProvider) + + +class TestSearXNGSearchProviderSearch: + """Happy path and error handling for SearXNGSearchProvider.search().""" + + _SAMPLE_RESPONSE = { + "results": [ + {"title": "Result A", "url": "https://a.example.com", "content": "Desc A", "score": 0.9}, + {"title": "Result B", "url": "https://b.example.com", "content": "Desc B", "score": 0.7}, + {"title": "Result C", "url": "https://c.example.com", "content": "Desc C", "score": 0.5}, + ] + } + + def _make_mock_response(self, json_data, status_code=200): + mock_resp = MagicMock() + mock_resp.status_code = status_code + mock_resp.json.return_value = json_data + mock_resp.raise_for_status = MagicMock() + return mock_resp + + def test_happy_path_returns_normalized_results(self, monkeypatch): + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + mock_resp = self._make_mock_response(self._SAMPLE_RESPONSE) + + with patch("httpx.get", return_value=mock_resp): + result = SearXNGSearchProvider().search("test query", limit=5) + + assert result["success"] is True + web = result["data"]["web"] + assert len(web) == 3 + assert web[0]["title"] == "Result A" + assert web[0]["url"] == "https://a.example.com" + assert web[0]["description"] == "Desc A" + assert web[0]["position"] == 1 + + def test_results_sorted_by_score_descending(self, monkeypatch): + """Results should be sorted by score before limit is applied.""" + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + unordered = { + "results": [ + {"title": "Low", "url": "https://low.example.com", "content": "", "score": 0.1}, + {"title": "High", "url": "https://high.example.com", "content": "", "score": 0.99}, + {"title": "Mid", "url": "https://mid.example.com", "content": "", "score": 0.5}, + ] + } + mock_resp = self._make_mock_response(unordered) + + with patch("httpx.get", return_value=mock_resp): + result = SearXNGSearchProvider().search("query", limit=5) + + assert result["success"] is True + assert result["data"]["web"][0]["title"] == "High" + assert result["data"]["web"][1]["title"] == "Mid" + assert result["data"]["web"][2]["title"] == "Low" + + def test_limit_is_respected(self, monkeypatch): + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + mock_resp = self._make_mock_response(self._SAMPLE_RESPONSE) + + with patch("httpx.get", return_value=mock_resp): + result = SearXNGSearchProvider().search("query", limit=2) + + assert result["success"] is True + assert len(result["data"]["web"]) == 2 + + def test_position_is_one_indexed(self, monkeypatch): + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + mock_resp = self._make_mock_response(self._SAMPLE_RESPONSE) + + with patch("httpx.get", return_value=mock_resp): + result = SearXNGSearchProvider().search("query", limit=5) + + positions = [r["position"] for r in result["data"]["web"]] + assert positions == [1, 2, 3] + + def test_empty_results(self, monkeypatch): + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + mock_resp = self._make_mock_response({"results": []}) + + with patch("httpx.get", return_value=mock_resp): + result = SearXNGSearchProvider().search("nothing", limit=5) + + assert result["success"] is True + assert result["data"]["web"] == [] + + def test_missing_score_falls_back_to_zero(self, monkeypatch): + """Results without a score field should sort to the bottom.""" + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + data = { + "results": [ + {"title": "No score", "url": "https://noscore.example.com", "content": ""}, + {"title": "Has score", "url": "https://scored.example.com", "content": "", "score": 0.8}, + ] + } + mock_resp = self._make_mock_response(data) + + with patch("httpx.get", return_value=mock_resp): + result = SearXNGSearchProvider().search("query", limit=5) + + assert result["success"] is True + # Has score should sort first (0.8 > 0) + assert result["data"]["web"][0]["title"] == "Has score" + + def test_http_error_returns_failure(self, monkeypatch): + import httpx + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + + mock_resp = MagicMock() + mock_resp.status_code = 500 + http_err = httpx.HTTPStatusError("500", request=MagicMock(), response=mock_resp) + + with patch("httpx.get", side_effect=http_err): + result = SearXNGSearchProvider().search("query", limit=5) + + assert result["success"] is False + assert "500" in result["error"] + + def test_request_error_returns_failure(self, monkeypatch): + import httpx + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_providers.searxng import SearXNGSearchProvider + + with patch("httpx.get", side_effect=httpx.RequestError("connection refused")): + result = SearXNGSearchProvider().search("query", limit=5) + + assert result["success"] is False + assert "localhost:8080" in result["error"] or "connection" in result["error"].lower() + + def test_missing_url_returns_failure(self, monkeypatch): + monkeypatch.delenv("SEARXNG_URL", raising=False) + from tools.web_providers.searxng import SearXNGSearchProvider + + result = SearXNGSearchProvider().search("query", limit=5) + assert result["success"] is False + assert "SEARXNG_URL" in result["error"] + + def test_trailing_slash_stripped_from_url(self, monkeypatch): + """Base URL trailing slash should not produce double-slash in endpoint.""" + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080/") + from tools.web_providers.searxng import SearXNGSearchProvider + mock_resp = self._make_mock_response({"results": []}) + + calls = [] + def capture_get(url, **kwargs): + calls.append(url) + return mock_resp + + with patch("httpx.get", side_effect=capture_get): + SearXNGSearchProvider().search("query", limit=5) + + assert calls[0] == "http://localhost:8080/search", f"Got: {calls[0]}" + + +# --------------------------------------------------------------------------- +# Integration: _is_backend_available recognizes "searxng" +# --------------------------------------------------------------------------- + + +class TestIsBackendAvailable: + def test_searxng_available_when_url_set(self, monkeypatch): + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + from tools.web_tools import _is_backend_available + assert _is_backend_available("searxng") is True + + def test_searxng_unavailable_when_url_missing(self, monkeypatch): + monkeypatch.delenv("SEARXNG_URL", raising=False) + from tools.web_tools import _is_backend_available + assert _is_backend_available("searxng") is False + + def test_unknown_backend_still_false(self): + from tools.web_tools import _is_backend_available + assert _is_backend_available("unknownbackend") is False + + +# --------------------------------------------------------------------------- +# Integration: _get_backend() accepts "searxng" as configured value +# --------------------------------------------------------------------------- + + +class TestGetBackendSearXNG: + def test_configured_searxng_returns_searxng(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "searxng"}) + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + assert web_tools._get_backend() == "searxng" + + def test_auto_detect_picks_searxng_when_only_url_set(self, monkeypatch): + """When no backend is configured but SEARXNG_URL is set, auto-detect returns it.""" + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False) + monkeypatch.delenv("FIRECRAWL_API_URL", raising=False) + monkeypatch.delenv("PARALLEL_API_KEY", raising=False) + monkeypatch.delenv("TAVILY_API_KEY", raising=False) + monkeypatch.delenv("EXA_API_KEY", raising=False) + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + # Suppress tool gateway + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + assert web_tools._get_backend() == "searxng" + + def test_searxng_does_not_override_higher_priority_provider(self, monkeypatch): + """Tavily (higher priority than searxng) should win in auto-detect.""" + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False) + monkeypatch.delenv("FIRECRAWL_API_URL", raising=False) + monkeypatch.delenv("PARALLEL_API_KEY", raising=False) + monkeypatch.setenv("TAVILY_API_KEY", "tvly-key") + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + assert web_tools._get_backend() == "tavily" + + +# --------------------------------------------------------------------------- +# Integration: check_web_api_key includes searxng +# --------------------------------------------------------------------------- + + +class TestCheckWebApiKey: + def test_searxng_satisfies_check_web_api_key(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "searxng"}) + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + assert web_tools.check_web_api_key() is True + + def test_no_credentials_fails(self, monkeypatch): + from tools import web_tools + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {}) + monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False) + monkeypatch.delenv("FIRECRAWL_API_URL", raising=False) + monkeypatch.delenv("PARALLEL_API_KEY", raising=False) + monkeypatch.delenv("TAVILY_API_KEY", raising=False) + monkeypatch.delenv("EXA_API_KEY", raising=False) + monkeypatch.delenv("SEARXNG_URL", raising=False) + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False) + assert web_tools.check_web_api_key() is False + + +# --------------------------------------------------------------------------- +# searxng-only: web_extract and web_crawl return clear errors +# --------------------------------------------------------------------------- + + +class TestSearXNGOnlyExtractCrawlErrors: + """When searxng is the active backend, extract/crawl must return clear errors.""" + + def test_web_crawl_searxng_returns_clear_error(self, monkeypatch): + import asyncio + from tools import web_tools + + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "searxng"}) + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False) + monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False) + + import json + result_str = asyncio.get_event_loop().run_until_complete( + web_tools.web_crawl_tool("https://example.com") + ) + result = json.loads(result_str) + assert result["success"] is False + assert "search-only" in result["error"].lower() or "SearXNG" in result["error"] + + def test_web_extract_searxng_returns_clear_error(self, monkeypatch): + import asyncio + from tools import web_tools + + monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "searxng"}) + monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080") + monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False) + monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False) + + import json + result_str = asyncio.get_event_loop().run_until_complete( + web_tools.web_extract_tool(["https://example.com"]) + ) + result = json.loads(result_str) + assert result["success"] is False + assert "search-only" in result["error"].lower() or "SearXNG" in result["error"] diff --git a/tools/web_providers/searxng.py b/tools/web_providers/searxng.py new file mode 100644 index 0000000000..59ddcb8d51 --- /dev/null +++ b/tools/web_providers/searxng.py @@ -0,0 +1,131 @@ +"""SearXNG web search provider. + +SearXNG is a free, self-hosted, privacy-respecting metasearch engine. +It implements ``WebSearchProvider`` only — there is no extract capability. + +Configuration:: + + # ~/.hermes/config.yaml (SEARXNG_URL is a URL, not a secret — use config.yaml not .env) + SEARXNG_URL: http://localhost:8080 + + # Use SearXNG for search, pair with any extract provider: + web: + search_backend: "searxng" + extract_backend: "firecrawl" + +Public SearXNG instances are listed at https://searx.space/ but self-hosting +is recommended for production use (rate limits and availability vary per +public instance). +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict + +from tools.web_providers.base import WebSearchProvider + +logger = logging.getLogger(__name__) + + +class SearXNGSearchProvider(WebSearchProvider): + """Search via a SearXNG instance. + + Requires ``SEARXNG_URL`` to be set (e.g. ``http://localhost:8080``). + No API key needed — SearXNG is open-source and self-hosted. + + Uses the SearXNG JSON API (``/search?format=json``). Results are + sorted by SearXNG's own score and truncated to *limit*. + """ + + def provider_name(self) -> str: + return "searxng" + + def is_configured(self) -> bool: + """Return True when ``SEARXNG_URL`` is set to a non-empty value.""" + return bool(os.getenv("SEARXNG_URL", "").strip()) + + def search(self, query: str, limit: int = 5) -> Dict[str, Any]: + """Execute a search against the configured SearXNG instance. + + Returns normalized results:: + + { + "success": True, + "data": { + "web": [ + { + "title": str, + "url": str, + "description": str, + "position": int, + }, + ... + ] + } + } + + On failure returns ``{"success": False, "error": str}``. + """ + import httpx + + base_url = os.getenv("SEARXNG_URL", "").strip().rstrip("/") + if not base_url: + return {"success": False, "error": "SEARXNG_URL is not set"} + + params: Dict[str, Any] = { + "q": query, + "format": "json", + "pageno": 1, + } + + try: + resp = httpx.get( + f"{base_url}/search", + params=params, + timeout=15, + headers={"Accept": "application/json"}, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as exc: + logger.warning("SearXNG HTTP error: %s", exc) + return {"success": False, "error": f"SearXNG returned HTTP {exc.response.status_code}"} + except httpx.RequestError as exc: + logger.warning("SearXNG request error: %s", exc) + return {"success": False, "error": f"Could not reach SearXNG at {base_url}: {exc}"} + + try: + data = resp.json() + except Exception as exc: # noqa: BLE001 + logger.warning("SearXNG response parse error: %s", exc) + return {"success": False, "error": "Could not parse SearXNG response as JSON"} + + raw_results = data.get("results", []) + + # SearXNG may return a score field; sort descending and cap to limit. + sorted_results = sorted( + raw_results, + key=lambda r: float(r.get("score", 0)), + reverse=True, + )[:limit] + + web_results = [ + { + "title": str(r.get("title", "")), + "url": str(r.get("url", "")), + "description": str(r.get("content", "")), + "position": i + 1, + } + for i, r in enumerate(sorted_results) + ] + + logger.info( + "SearXNG search '%s': %d results (from %d raw, limit %d)", + query, + len(web_results), + len(raw_results), + limit, + ) + + return {"success": True, "data": {"web": web_results}} diff --git a/tools/web_tools.py b/tools/web_tools.py index b5eb111685..e3268ac381 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -126,7 +126,7 @@ def _get_backend() -> str: keys manually without running setup. """ configured = (_load_web_config().get("backend") or "").lower().strip() - if configured in ("parallel", "firecrawl", "tavily", "exa"): + if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng"): return configured # Fallback for manual / legacy config — pick the highest-priority @@ -137,6 +137,7 @@ def _get_backend() -> str: ("parallel", _has_env("PARALLEL_API_KEY")), ("tavily", _has_env("TAVILY_API_KEY")), ("exa", _has_env("EXA_API_KEY")), + ("searxng", _has_env("SEARXNG_URL")), ) for backend, available in backend_candidates: if available: @@ -193,6 +194,8 @@ def _is_backend_available(backend: str) -> bool: return check_firecrawl_api_key() if backend == "tavily": return _has_env("TAVILY_API_KEY") + if backend == "searxng": + return _has_env("SEARXNG_URL") return False # ─── Firecrawl Client ──────────────────────────────────────────────────────── @@ -1187,6 +1190,16 @@ def web_search_tool(query: str, limit: int = 5) -> str: _debug.save() return result_json + if backend == "searxng": + from tools.web_providers.searxng import SearXNGSearchProvider + response_data = SearXNGSearchProvider().search(query, limit) + debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", [])) + result_json = json.dumps(response_data, indent=2, ensure_ascii=False) + debug_call_data["final_response_size"] = len(result_json) + _debug.log_call("web_search_tool", debug_call_data) + _debug.save() + return result_json + if backend == "tavily": logger.info("Tavily search: '%s' (limit: %d)", query, limit) raw = _tavily_request("search", { @@ -1337,6 +1350,13 @@ async def web_extract_tool( "include_images": False, }) results = _normalize_tavily_documents(raw, fallback_url=safe_urls[0] if safe_urls else "") + elif backend == "searxng": + # SearXNG is search-only — it cannot extract URL content + return json.dumps({ + "success": False, + "error": "SearXNG is a search-only backend and cannot extract URL content. " + "Set web.extract_backend to firecrawl, tavily, exa, or parallel.", + }, ensure_ascii=False) else: # ── Firecrawl extraction ── # Determine requested formats for Firecrawl v2 @@ -1712,6 +1732,14 @@ async def web_crawl_tool( _debug.save() return cleaned_result + # SearXNG is search-only — it cannot crawl + if backend == "searxng": + return json.dumps({ + "error": "SearXNG is a search-only backend and cannot crawl URLs. " + "Set FIRECRAWL_API_KEY for crawling, or use web_search instead.", + "success": False, + }, ensure_ascii=False) + # web_crawl requires Firecrawl or the Firecrawl tool-gateway — Parallel has no crawl API if not check_firecrawl_api_key(): return json.dumps({ @@ -2007,9 +2035,9 @@ def check_firecrawl_api_key() -> bool: def check_web_api_key() -> bool: """Check whether the configured web backend is available.""" configured = _load_web_config().get("backend", "").lower().strip() - if configured in ("exa", "parallel", "firecrawl", "tavily"): + if configured in ("exa", "parallel", "firecrawl", "tavily", "searxng"): return _is_backend_available(configured) - return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily")) + return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng")) def check_auxiliary_model() -> bool: @@ -2044,6 +2072,8 @@ if __name__ == "__main__": print(" Using Parallel API (https://parallel.ai)") elif backend == "tavily": print(" Using Tavily API (https://tavily.com)") + elif backend == "searxng": + print(f" Using SearXNG (search only): {os.getenv('SEARXNG_URL', '').strip()}") else: if firecrawl_url_available: print(f" Using self-hosted Firecrawl: {os.getenv('FIRECRAWL_API_URL').strip().rstrip('/')}")