""" Web Search Provider Registry ============================ Central map of registered web providers. Populated by plugins at import-time via :meth:`PluginContext.register_web_search_provider`; consumed by the ``web_search`` and ``web_extract`` tool wrappers in :mod:`tools.web_tools` to dispatch each call to the active backend. Active selection ---------------- The active provider is chosen by configuration with this precedence: 1. ``web.search_backend`` (for search) or ``web.extract_backend`` (for extract) 2. ``web.backend`` (shared fallback) 3. If exactly one capability-eligible provider is registered, use it. 4. Legacy preference order (``brave-free`` → ``firecrawl`` → ``searxng`` → ``ddgs``) so installs that omitted the config key keep working. 5. Otherwise ``None`` — the tool surfaces a helpful error pointing at ``hermes tools``. The capability filter (``supports_search`` vs ``supports_extract``) is applied at every step so a search-only provider (``brave-free``) configured as ``web.extract_backend`` correctly falls through. """ from __future__ import annotations import logging import threading from typing import Dict, List, Optional from agent.web_search_provider import WebSearchProvider logger = logging.getLogger(__name__) _providers: Dict[str, WebSearchProvider] = {} _lock = threading.Lock() def register_provider(provider: WebSearchProvider) -> None: """Register a web search/extract provider. Re-registration (same ``name``) overwrites the previous entry and logs a debug message — makes hot-reload scenarios (tests, dev loops) behave predictably. """ if not isinstance(provider, WebSearchProvider): raise TypeError( f"register_provider() expects a WebSearchProvider instance, " f"got {type(provider).__name__}" ) name = provider.name if not isinstance(name, str) or not name.strip(): raise ValueError("Web provider .name must be a non-empty string") with _lock: existing = _providers.get(name) _providers[name] = provider if existing is not None: logger.debug( "Web provider '%s' re-registered (was %r)", name, type(existing).__name__, ) else: logger.debug( "Registered web provider '%s' (%s)", name, type(provider).__name__, ) def list_providers() -> List[WebSearchProvider]: """Return all registered providers, sorted by name.""" with _lock: items = list(_providers.values()) return sorted(items, key=lambda p: p.name) def get_provider(name: str) -> Optional[WebSearchProvider]: """Return the provider registered under *name*, or None.""" if not isinstance(name, str): return None with _lock: return _providers.get(name.strip()) # --------------------------------------------------------------------------- # Active-provider resolution # --------------------------------------------------------------------------- def _read_config_key(*path: str) -> Optional[str]: """Resolve a dotted config key from ``config.yaml``. Returns None on miss.""" try: from hermes_cli.config import load_config cfg = load_config() cur = cfg for segment in path: if not isinstance(cur, dict): return None cur = cur.get(segment) if isinstance(cur, str) and cur.strip(): return cur.strip() except Exception as exc: logger.debug("Could not read config %s: %s", ".".join(path), exc) return None # Legacy preference order — preserves behaviour for users who set no config # at all. brave-free first because it was the shipped default after the # Brave migration; firecrawl second for back-compat with older configs. _LEGACY_PREFERENCE = ("brave-free", "firecrawl", "searxng", "ddgs") def _resolve(configured: Optional[str], *, capability: str) -> Optional[WebSearchProvider]: """Resolve the active provider for a capability ("search" | "extract").""" with _lock: snapshot = dict(_providers) def _capable(p: WebSearchProvider) -> bool: if capability == "search": return bool(p.supports_search()) if capability == "extract": return bool(p.supports_extract()) return False if configured: provider = snapshot.get(configured) if provider is not None and _capable(provider): return provider if provider is None: logger.debug( "web backend '%s' configured but not registered; falling back", configured, ) else: logger.debug( "web backend '%s' configured but does not support '%s'; falling back", configured, capability, ) eligible = [p for p in snapshot.values() if _capable(p)] if len(eligible) == 1: return eligible[0] for legacy in _LEGACY_PREFERENCE: provider = snapshot.get(legacy) if provider is not None and _capable(provider): return provider return None def get_active_search_provider() -> Optional[WebSearchProvider]: """Resolve the currently-active web search provider. Reads ``web.search_backend`` (preferred) or ``web.backend`` (shared fallback) from config.yaml; falls back per the module docstring. """ explicit = _read_config_key("web", "search_backend") or _read_config_key("web", "backend") return _resolve(explicit, capability="search") def get_active_extract_provider() -> Optional[WebSearchProvider]: """Resolve the currently-active web extract provider. Reads ``web.extract_backend`` (preferred) or ``web.backend`` (shared fallback) from config.yaml; falls back per the module docstring. """ explicit = _read_config_key("web", "extract_backend") or _read_config_key("web", "backend") return _resolve(explicit, capability="extract") def _reset_for_tests() -> None: """Clear the registry. **Test-only.**""" with _lock: _providers.clear()