refactor(web): delete inline vendor helpers, re-export from plugins

Removes ~580 lines of dead code from tools/web_tools.py that were superseded by the plugin migration but kept around in the cutover commit to keep the diff focused. Replaces them with thin re-export shims so existing tests and external callers that reach for the legacy ``tools.web_tools.<name>`` paths continue to work transparently. Deleted from tools/web_tools.py -------------------------------- - Lazy Firecrawl SDK proxy (_load_firecrawl_cls, _FirecrawlProxy, _FIRECRAWL_CLS_CACHE, the Firecrawl singleton) - Firecrawl client section (_get_direct_firecrawl_config, _get_firecrawl_gateway_url, _is_tool_gateway_ready, _has_direct_firecrawl_config, _raise_web_backend_configuration_error, _firecrawl_backend_help_suffix, _get_firecrawl_client) - Parallel client section (_get_parallel_client, _get_async_parallel_client, _parallel_client, _async_parallel_client) - Tavily client section (_TAVILY_BASE_URL, _tavily_request, _normalize_tavily_search_results, _normalize_tavily_documents) - Generic SDK normalizers (_to_plain_object, _normalize_result_list, _extract_web_search_results, _extract_scrape_payload) - Exa client section (_get_exa_client, _exa_client, _exa_search, _exa_extract) - Parallel helpers (_parallel_search, _parallel_extract) - Duplicate inline check_firecrawl_api_key Net: tools/web_tools.py drops from 2227 → 1613 lines (-614 lines). Re-exports added at top of tools/web_tools.py --------------------------------------------- - From plugins.web.firecrawl.provider: Firecrawl, _FirecrawlProxy, _FIRECRAWL_CLS_CACHE, _load_firecrawl_cls, _get_direct_firecrawl_config, _get_firecrawl_gateway_url, _is_tool_gateway_ready, _has_direct_firecrawl_config, _firecrawl_backend_help_suffix, _raise_web_backend_configuration_error, _get_firecrawl_client, _to_plain_object, _normalize_result_list, _extract_web_search_results, _extract_scrape_payload, check_firecrawl_api_key - From plugins.web.tavily.provider: _tavily_request, _normalize_tavily_search_results, _normalize_tavily_documents - From plugins.web.parallel.provider: _get_parallel_client, _get_async_parallel_client - From plugins.web.exa.provider: _get_exa_client Plus retained module-level imports for backward-compat with tests: - httpx (tests patch tools.web_tools.httpx for tavily request mocking) - build_vendor_gateway_url, _read_nous_access_token, resolve_managed_tool_gateway, managed_nous_tools_enabled, prefers_gateway (tests patch tools.web_tools.<name>) Plugin indirection pattern (key technique) ------------------------------------------ For functions inside the firecrawl/parallel/exa plugins to honor unit-test patches that target ``tools.web_tools.<name>``, the plugin implementations now do ``import tools.web_tools as _wt`` at call time and read helper names through that module (``_wt._read_nous_access_token``, ``_wt.Firecrawl``, ``_wt.prefers_gateway``, etc.). This makes the existing test patches transparently reach the plugin code without any test changes. The cached client globals (_firecrawl_client, _firecrawl_client_config, _parallel_client, _async_parallel_client, _exa_client) also now live on tools.web_tools so existing test setup_method handlers that reset ``tools.web_tools._<vendor>_client = None`` between cases keep working. The plugins read/write the cache via getattr/setattr on the web_tools module. Verified -------- - 173/173 targeted web tests pass: test_web_providers.py, test_web_providers_brave_free.py, test_web_providers_ddgs.py, test_web_providers_searxng.py, test_web_tools_config.py, test_web_tools_tavily.py, test_website_policy.py, test_config_null_guard.py - Compile-clean (py_compile.compile passes) - All inline implementations now exist in exactly one place (plugins.web.<vendor>.provider) Follow-up clean-up ------------------ - Drop _WEB_PLUGIN_SKIPLIST + hardcoded TOOL_CATEGORIES["web"] rows (next commit) - Delete tools/web_providers/ directory entirely - Add tests/plugins/web/ coverage - Full tests/tools/ + tests/gateway/ regression sweep before promoting PR
2026-05-18 04:41:56 +00:00 · 2026-05-14 00:47:22 +05:30 · 2026-05-14 00:47:22 +05:30 · 748f3e016b
commit 748f3e016b
parent 5e54330e27
4 changed files with 181 additions and 608 deletions
--- a/plugins/web/exa/provider.py
+++ b/plugins/web/exa/provider.py
@ -40,14 +40,22 @@ _exa_client: Any = None
 def _get_exa_client() -> Any:
    """Lazy-import and cache an Exa SDK client.

-    Mirrors :func:`tools.web_tools._get_exa_client`. Raises ``ValueError``
-    when ``EXA_API_KEY`` is unset — the dispatcher catches that and
-    surfaces a typed error response.
+    Cache lives on :mod:`tools.web_tools` (as ``_exa_client``) so unit
+    tests that reset that name between cases keep working. Raises
+    ``ValueError`` when ``EXA_API_KEY`` is unset.
    """
-    global _exa_client
+    import tools.web_tools as _wt

-    if _exa_client is not None:
-        return _exa_client
+    cached = getattr(_wt, "_exa_client", None)
+    if cached is not None:
+        return cached
+
+    api_key = os.getenv("EXA_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "EXA_API_KEY environment variable not set. "
+            "Get your API key at https://exa.ai"
+        )

    try:
        from tools.lazy_deps import ensure as _lazy_ensure
@ -60,22 +68,17 @@ def _get_exa_client() -> Any:

    from exa_py import Exa  # noqa: WPS433 — deliberately lazy

-    api_key = os.getenv("EXA_API_KEY")
-    if not api_key:
-        raise ValueError(
-            "EXA_API_KEY environment variable not set. "
-            "Get your API key at https://exa.ai"
-        )
-
-    _exa_client = Exa(api_key=api_key)
-    _exa_client.headers["x-exa-integration"] = "hermes-agent"
-    return _exa_client
+    client = Exa(api_key=api_key)
+    client.headers["x-exa-integration"] = "hermes-agent"
+    _wt._exa_client = client
+    return client


 def _reset_client_for_tests() -> None:
    """Drop the cached Exa client so tests can re-instantiate cleanly."""
-    global _exa_client
-    _exa_client = None
+    import tools.web_tools as _wt
+
+    _wt._exa_client = None


 class ExaWebSearchProvider(WebSearchProvider):
--- a/plugins/web/firecrawl/provider.py
+++ b/plugins/web/firecrawl/provider.py
@ -136,20 +136,24 @@ def _get_direct_firecrawl_config() -> Optional[tuple]:

 def _get_firecrawl_gateway_url() -> str:
    """Return the configured Firecrawl gateway URL."""
-    from tools.tool_backend_helpers import build_vendor_gateway_url
+    import tools.web_tools as _wt

-    return build_vendor_gateway_url("firecrawl")
+    return _wt.build_vendor_gateway_url("firecrawl")


 def _is_tool_gateway_ready() -> bool:
-    """Return True when gateway URL + Nous Subscriber token are available."""
-    from tools.managed_tool_gateway import (
-        read_nous_access_token,
-        resolve_managed_tool_gateway,
-    )
+    """Return True when gateway URL + Nous Subscriber token are available.

-    return resolve_managed_tool_gateway(
-        "firecrawl", token_reader=read_nous_access_token
+    Reads ``read_nous_access_token`` and ``resolve_managed_tool_gateway``
+    via :mod:`tools.web_tools` rather than direct imports, so unit tests
+    that ``patch("tools.web_tools._read_nous_access_token", ...)`` see
+    their patches honored. The names are re-exported on
+    :mod:`tools.web_tools` for exactly this reason.
+    """
+    import tools.web_tools as _wt
+
+    return _wt.resolve_managed_tool_gateway(
+        "firecrawl", token_reader=_wt._read_nous_access_token
    ) is not None


@ -169,9 +173,9 @@ def check_firecrawl_api_key() -> bool:

 def _firecrawl_backend_help_suffix() -> str:
    """Return optional managed-gateway guidance for Firecrawl help text."""
-    from tools.tool_backend_helpers import managed_nous_tools_enabled
+    import tools.web_tools as _wt

-    if not managed_nous_tools_enabled():
+    if not _wt.managed_nous_tools_enabled():
        return ""
    return (
        ", or use the Nous Tool Gateway via your subscription "
@ -181,14 +185,14 @@ def _firecrawl_backend_help_suffix() -> str:

 def _raise_web_backend_configuration_error() -> None:
    """Raise a clear error for unsupported web backend configuration."""
-    from tools.tool_backend_helpers import managed_nous_tools_enabled
+    import tools.web_tools as _wt

    message = (
        "Web tools are not configured. "
        "Set FIRECRAWL_API_KEY for cloud Firecrawl or set FIRECRAWL_API_URL "
        "for a self-hosted Firecrawl instance."
    )
-    if managed_nous_tools_enabled():
+    if _wt.managed_nous_tools_enabled():
        message += (
            " With your Nous subscription you can also use the Tool Gateway — "
            "run `hermes tools` and select Nous Subscription as the web provider."
@ -204,21 +208,24 @@ def _get_firecrawl_client() -> Any:
    direct Firecrawl takes precedence when explicitly configured.

    Raises ValueError when neither path is usable.
-    """
-    global _firecrawl_client, _firecrawl_client_config

-    from tools.managed_tool_gateway import (
-        read_nous_access_token,
-        resolve_managed_tool_gateway,
-    )
-    from tools.tool_backend_helpers import prefers_gateway
+    The cached client is stored on :mod:`tools.web_tools` (as
+    ``_firecrawl_client`` and ``_firecrawl_client_config``) rather than on
+    this plugin module so that unit tests that reset the cache via
+    ``tools.web_tools._firecrawl_client = None`` keep working. Helper
+    functions (``prefers_gateway``, ``resolve_managed_tool_gateway``,
+    ``_read_nous_access_token``, ``Firecrawl``) are also looked up via
+    :mod:`tools.web_tools` for the same reason — see
+    :func:`_is_tool_gateway_ready`.
+    """
+    import tools.web_tools as _wt

    direct_config = _get_direct_firecrawl_config()
-    if direct_config is not None and not prefers_gateway("web"):
+    if direct_config is not None and not _wt.prefers_gateway("web"):
        kwargs, client_config = direct_config
    else:
-        managed_gateway = resolve_managed_tool_gateway(
-            "firecrawl", token_reader=read_nous_access_token
+        managed_gateway = _wt.resolve_managed_tool_gateway(
+            "firecrawl", token_reader=_wt._read_nous_access_token
        )
        if managed_gateway is None:
            logger.error(
@ -237,12 +244,16 @@ def _get_firecrawl_client() -> Any:
            managed_gateway.nous_user_token,
        )

-    if _firecrawl_client is not None and _firecrawl_client_config == client_config:
-        return _firecrawl_client
+    cached = getattr(_wt, "_firecrawl_client", None)
+    cached_config = getattr(_wt, "_firecrawl_client_config", None)
+    if cached is not None and cached_config == client_config:
+        return cached

-    _firecrawl_client = Firecrawl(**kwargs)
-    _firecrawl_client_config = client_config
-    return _firecrawl_client
+    # Construct via the re-exported Firecrawl proxy on tools.web_tools so
+    # unit tests patching ``tools.web_tools.Firecrawl`` see their mock.
+    _wt._firecrawl_client = _wt.Firecrawl(**kwargs)
+    _wt._firecrawl_client_config = client_config
+    return _wt._firecrawl_client


 def _reset_client_for_tests() -> None:
--- a/plugins/web/parallel/provider.py
+++ b/plugins/web/parallel/provider.py
@ -37,8 +37,10 @@ from agent.web_search_provider import WebSearchProvider
 logger = logging.getLogger(__name__)

 # Module-level client caches mirroring the legacy `tools.web_tools._parallel_client`
-# / `_async_parallel_client` pattern. Per-process singletons so we don't
-# pay SDK construction cost per call.
+# / `_async_parallel_client` pattern. For tests, the canonical cache lives on
+# tools.web_tools so existing setup_method() handlers that reset
+# ``tools.web_tools._parallel_client = None`` keep working — we read/write
+# the cache via that module rather than these module-level globals.
 _parallel_client: Any = None
 _async_parallel_client: Any = None

@ -62,41 +64,56 @@ def _ensure_parallel_sdk_installed() -> None:


 def _get_sync_client() -> Any:
-    """Lazy-load + cache the sync Parallel client."""
-    global _parallel_client
-    if _parallel_client is not None:
-        return _parallel_client
+    """Lazy-load + cache the sync Parallel client.
+
+    Cache lives on :mod:`tools.web_tools` (as ``_parallel_client``) so unit
+    tests that reset that name between cases keep working.
+    """
+    import tools.web_tools as _wt
+
+    cached = getattr(_wt, "_parallel_client", None)
+    if cached is not None:
+        return cached
+
+    api_key = os.getenv("PARALLEL_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "PARALLEL_API_KEY environment variable not set. "
+            "Get your API key at https://parallel.ai"
+        )

    _ensure_parallel_sdk_installed()
    from parallel import Parallel  # noqa: WPS433 — deliberately lazy

+    client = Parallel(api_key=api_key)
+    _wt._parallel_client = client
+    return client
+
+
+def _get_async_client() -> Any:
+    """Lazy-load + cache the async Parallel client.
+
+    Cache lives on :mod:`tools.web_tools` (as ``_async_parallel_client``).
+    """
+    import tools.web_tools as _wt
+
+    cached = getattr(_wt, "_async_parallel_client", None)
+    if cached is not None:
+        return cached
+
    api_key = os.getenv("PARALLEL_API_KEY")
    if not api_key:
        raise ValueError(
            "PARALLEL_API_KEY environment variable not set. "
            "Get your API key at https://parallel.ai"
        )
-    _parallel_client = Parallel(api_key=api_key)
-    return _parallel_client
-
-
-def _get_async_client() -> Any:
-    """Lazy-load + cache the async Parallel client."""
-    global _async_parallel_client
-    if _async_parallel_client is not None:
-        return _async_parallel_client

    _ensure_parallel_sdk_installed()
    from parallel import AsyncParallel  # noqa: WPS433 — deliberately lazy

-    api_key = os.getenv("PARALLEL_API_KEY")
-    if not api_key:
-        raise ValueError(
-            "PARALLEL_API_KEY environment variable not set. "
-            "Get your API key at https://parallel.ai"
-        )
-    _async_parallel_client = AsyncParallel(api_key=api_key)
-    return _async_parallel_client
+    client = AsyncParallel(api_key=api_key)
+    _wt._async_parallel_client = client
+    return client


 def _reset_clients_for_tests() -> None:
@ -106,6 +123,12 @@ def _reset_clients_for_tests() -> None:
    _async_parallel_client = None


+# Backward-compatible aliases for the names that lived in tools.web_tools
+# before the migration (matches existing tests + external callers).
+_get_parallel_client = _get_sync_client
+_get_async_parallel_client = _get_async_client
+
+
 def _resolve_search_mode() -> str:
    """Return the validated PARALLEL_SEARCH_MODE value (default "agentic")."""
    mode = os.getenv("PARALLEL_SEARCH_MODE", "agentic").lower().strip()