diff --git a/agent/web_search_registry.py b/agent/web_search_registry.py new file mode 100644 index 00000000000..8f1e884b3cf --- /dev/null +++ b/agent/web_search_registry.py @@ -0,0 +1,178 @@ +""" +Web Search Provider Registry +============================ + +Central map of registered web providers. Populated by plugins at import-time +via :meth:`PluginContext.register_web_search_provider`; consumed by the +``web_search`` and ``web_extract`` tool wrappers in :mod:`tools.web_tools` to +dispatch each call to the active backend. + +Active selection +---------------- +The active provider is chosen by configuration with this precedence: + +1. ``web.search_backend`` (for search) or ``web.extract_backend`` (for extract) +2. ``web.backend`` (shared fallback) +3. If exactly one capability-eligible provider is registered, use it. +4. Legacy preference order (``brave-free`` → ``firecrawl`` → ``searxng`` → ``ddgs``) + so installs that omitted the config key keep working. +5. Otherwise ``None`` — the tool surfaces a helpful error pointing at + ``hermes tools``. + +The capability filter (``supports_search`` vs ``supports_extract``) is applied +at every step so a search-only provider (``brave-free``) configured as +``web.extract_backend`` correctly falls through. +""" + +from __future__ import annotations + +import logging +import threading +from typing import Dict, List, Optional + +from agent.web_search_provider import WebSearchProvider + +logger = logging.getLogger(__name__) + + +_providers: Dict[str, WebSearchProvider] = {} +_lock = threading.Lock() + + +def register_provider(provider: WebSearchProvider) -> None: + """Register a web search/extract provider. + + Re-registration (same ``name``) overwrites the previous entry and logs + a debug message — makes hot-reload scenarios (tests, dev loops) behave + predictably. + """ + if not isinstance(provider, WebSearchProvider): + raise TypeError( + f"register_provider() expects a WebSearchProvider instance, " + f"got {type(provider).__name__}" + ) + name = provider.name + if not isinstance(name, str) or not name.strip(): + raise ValueError("Web provider .name must be a non-empty string") + with _lock: + existing = _providers.get(name) + _providers[name] = provider + if existing is not None: + logger.debug( + "Web provider '%s' re-registered (was %r)", + name, type(existing).__name__, + ) + else: + logger.debug( + "Registered web provider '%s' (%s)", + name, type(provider).__name__, + ) + + +def list_providers() -> List[WebSearchProvider]: + """Return all registered providers, sorted by name.""" + with _lock: + items = list(_providers.values()) + return sorted(items, key=lambda p: p.name) + + +def get_provider(name: str) -> Optional[WebSearchProvider]: + """Return the provider registered under *name*, or None.""" + if not isinstance(name, str): + return None + with _lock: + return _providers.get(name.strip()) + + +# --------------------------------------------------------------------------- +# Active-provider resolution +# --------------------------------------------------------------------------- + + +def _read_config_key(*path: str) -> Optional[str]: + """Resolve a dotted config key from ``config.yaml``. Returns None on miss.""" + try: + from hermes_cli.config import load_config + + cfg = load_config() + cur = cfg + for segment in path: + if not isinstance(cur, dict): + return None + cur = cur.get(segment) + if isinstance(cur, str) and cur.strip(): + return cur.strip() + except Exception as exc: + logger.debug("Could not read config %s: %s", ".".join(path), exc) + return None + + +# Legacy preference order — preserves behaviour for users who set no config +# at all. brave-free first because it was the shipped default after the +# Brave migration; firecrawl second for back-compat with older configs. +_LEGACY_PREFERENCE = ("brave-free", "firecrawl", "searxng", "ddgs") + + +def _resolve(configured: Optional[str], *, capability: str) -> Optional[WebSearchProvider]: + """Resolve the active provider for a capability ("search" | "extract").""" + with _lock: + snapshot = dict(_providers) + + def _capable(p: WebSearchProvider) -> bool: + if capability == "search": + return bool(p.supports_search()) + if capability == "extract": + return bool(p.supports_extract()) + return False + + if configured: + provider = snapshot.get(configured) + if provider is not None and _capable(provider): + return provider + if provider is None: + logger.debug( + "web backend '%s' configured but not registered; falling back", + configured, + ) + else: + logger.debug( + "web backend '%s' configured but does not support '%s'; falling back", + configured, capability, + ) + + eligible = [p for p in snapshot.values() if _capable(p)] + if len(eligible) == 1: + return eligible[0] + + for legacy in _LEGACY_PREFERENCE: + provider = snapshot.get(legacy) + if provider is not None and _capable(provider): + return provider + + return None + + +def get_active_search_provider() -> Optional[WebSearchProvider]: + """Resolve the currently-active web search provider. + + Reads ``web.search_backend`` (preferred) or ``web.backend`` (shared + fallback) from config.yaml; falls back per the module docstring. + """ + explicit = _read_config_key("web", "search_backend") or _read_config_key("web", "backend") + return _resolve(explicit, capability="search") + + +def get_active_extract_provider() -> Optional[WebSearchProvider]: + """Resolve the currently-active web extract provider. + + Reads ``web.extract_backend`` (preferred) or ``web.backend`` (shared + fallback) from config.yaml; falls back per the module docstring. + """ + explicit = _read_config_key("web", "extract_backend") or _read_config_key("web", "backend") + return _resolve(explicit, capability="extract") + + +def _reset_for_tests() -> None: + """Clear the registry. **Test-only.**""" + with _lock: + _providers.clear()