From 3170c8d4484e8fcc12d8a18acdb4f246071b4249 Mon Sep 17 00:00:00 2001 From: Wesley Simplicio Date: Sat, 9 May 2026 08:51:52 -0300 Subject: [PATCH] fix(browser_tool): do not cache transient None cloud provider resolution Problem: `_get_cloud_provider()` set `_cloud_provider_resolved = True` before resolution. If credentials were briefly unavailable on the first call (e.g. a managed Nous Portal token mid-refresh), the resolver pinned the entire process to local mode forever, even after credentials self-healed seconds later. Root cause: bookkeeping was set up-front, so any code path that fell through to `return _cached_cloud_provider` (config read failure, no credentials yet, explicit-provider instantiation failure) committed the transient `None` to the cache permanently. Fix: invert the bookkeeping. `_cloud_provider_resolved = True` is now set only when (a) the user explicitly chose `cloud_provider: local`, or (b) a provider was successfully resolved. All transient `None` paths return without poisoning the cache, so the next call retries. Explicit provider instantiation failures now log at warning level with stack trace so operators can diagnose them. Tests: 5 new cases in tests/tools/test_browser_cloud_provider_cache.py covering explicit local, successful resolution, no-credentials-yet, config read failure, and explicit provider instantiation failure. Stash-verify confirmed the 3 transient-None tests fail without the fix. All 320 existing browser tests still green. Closes #22324 --- .../test_browser_cloud_provider_cache.py | 125 ++++++++++++++++++ tools/browser_tool.py | 26 +++- 2 files changed, 146 insertions(+), 5 deletions(-) create mode 100644 tests/tools/test_browser_cloud_provider_cache.py diff --git a/tests/tools/test_browser_cloud_provider_cache.py b/tests/tools/test_browser_cloud_provider_cache.py new file mode 100644 index 00000000000..d0ea09eab5e --- /dev/null +++ b/tests/tools/test_browser_cloud_provider_cache.py @@ -0,0 +1,125 @@ +"""Tests for ``_get_cloud_provider()`` caching policy. + +Regression coverage for issue #22324: a transient ``None`` from the resolver +must not be cached for the lifetime of the process. Cache only when: + +* The user explicitly opts in to ``cloud_provider: local``, OR +* A provider is successfully resolved. + +All other ``None`` outcomes (no credentials yet, config read error, explicit +provider instantiation failure) leave the cache unset so the next call retries. +""" +import logging +from unittest.mock import Mock, patch + +import pytest + +import tools.browser_tool as browser_tool + + +@pytest.fixture(autouse=True) +def _reset_resolver_state(monkeypatch): + monkeypatch.setattr(browser_tool, "_cached_cloud_provider", None) + monkeypatch.setattr(browser_tool, "_cloud_provider_resolved", False) + yield + + +class TestCloudProviderCachePolicy: + def test_explicit_local_caches_permanently(self, monkeypatch): + """`cloud_provider: local` is a positive choice and must stick.""" + monkeypatch.setattr( + "hermes_cli.config.read_raw_config", + lambda: {"browser": {"cloud_provider": "local"}}, + ) + + assert browser_tool._get_cloud_provider() is None + assert browser_tool._cloud_provider_resolved is True + + # Even if config later changes, the cache stays. + monkeypatch.setattr( + "hermes_cli.config.read_raw_config", + lambda: {"browser": {"cloud_provider": "browser-use"}}, + ) + assert browser_tool._get_cloud_provider() is None + + def test_successful_cloud_resolution_caches_permanently(self, monkeypatch): + """A real provider instance must be cached and reused.""" + fake_provider = Mock(name="BrowserUseProvider-instance") + factory = Mock(return_value=fake_provider) + monkeypatch.setattr( + browser_tool, "_PROVIDER_REGISTRY", {"browser-use": factory} + ) + monkeypatch.setattr( + "hermes_cli.config.read_raw_config", + lambda: {"browser": {"cloud_provider": "browser-use"}}, + ) + + assert browser_tool._get_cloud_provider() is fake_provider + assert browser_tool._cloud_provider_resolved is True + + # Subsequent calls hit the cache; factory not called again. + assert browser_tool._get_cloud_provider() is fake_provider + assert factory.call_count == 1 + + def test_no_credentials_yet_does_not_cache_none(self, monkeypatch): + """Auto-detect path with no creds: must NOT poison the cache.""" + monkeypatch.setattr( + "hermes_cli.config.read_raw_config", + lambda: {"browser": {}}, + ) + + bu_unconfigured = Mock() + bu_unconfigured.is_configured.return_value = False + bb_unconfigured = Mock() + bb_unconfigured.is_configured.return_value = False + monkeypatch.setattr( + browser_tool, "BrowserUseProvider", lambda: bu_unconfigured + ) + monkeypatch.setattr( + browser_tool, "BrowserbaseProvider", lambda: bb_unconfigured + ) + + assert browser_tool._get_cloud_provider() is None + assert browser_tool._cloud_provider_resolved is False + + # Credentials self-heal — next call must retry and pick up the provider. + healed = Mock(name="healed-provider") + healed.is_configured.return_value = True + monkeypatch.setattr(browser_tool, "BrowserUseProvider", lambda: healed) + + assert browser_tool._get_cloud_provider() is healed + assert browser_tool._cloud_provider_resolved is True + + def test_config_read_failure_does_not_cache_none(self, monkeypatch): + """A raised config read must not pin the resolver to local mode.""" + def boom(): + raise OSError("config file locked") + + monkeypatch.setattr("hermes_cli.config.read_raw_config", boom) + + assert browser_tool._get_cloud_provider() is None + assert browser_tool._cloud_provider_resolved is False + + def test_explicit_provider_instantiation_failure_does_not_cache( + self, monkeypatch, caplog + ): + """If `_PROVIDER_REGISTRY[key]()` raises, log warning and don't cache.""" + def exploding_factory(): + raise RuntimeError("missing dependency") + + monkeypatch.setattr( + browser_tool, "_PROVIDER_REGISTRY", {"browser-use": exploding_factory} + ) + monkeypatch.setattr( + "hermes_cli.config.read_raw_config", + lambda: {"browser": {"cloud_provider": "browser-use"}}, + ) + + with caplog.at_level(logging.WARNING, logger="tools.browser_tool"): + assert browser_tool._get_cloud_provider() is None + + assert browser_tool._cloud_provider_resolved is False + assert any( + "browser-use" in r.message and r.levelno == logging.WARNING + for r in caplog.records + ) diff --git a/tools/browser_tool.py b/tools/browser_tool.py index ee642db8bd1..0a48fc8977a 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -422,7 +422,7 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: if _cloud_provider_resolved: return _cached_cloud_provider - _cloud_provider_resolved = True + resolved: Optional[CloudBrowserProvider] = None try: from hermes_cli.config import read_raw_config cfg = read_raw_config() @@ -434,23 +434,39 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: ) if provider_key == "local": _cached_cloud_provider = None + _cloud_provider_resolved = True return None if provider_key and provider_key in _PROVIDER_REGISTRY: - _cached_cloud_provider = _PROVIDER_REGISTRY[provider_key]() + try: + resolved = _PROVIDER_REGISTRY[provider_key]() + except Exception: + logger.warning( + "Failed to instantiate explicit cloud_provider %r; will retry on next call", + provider_key, + exc_info=True, + ) + return None except Exception as e: logger.debug("Could not read cloud_provider from config: %s", e) + return None - if _cached_cloud_provider is None: + if resolved is None: # Prefer Browser Use (managed Nous gateway or direct API key), # fall back to Browserbase (direct credentials only). fallback_provider = BrowserUseProvider() if fallback_provider.is_configured(): - _cached_cloud_provider = fallback_provider + resolved = fallback_provider else: fallback_provider = BrowserbaseProvider() if fallback_provider.is_configured(): - _cached_cloud_provider = fallback_provider + resolved = fallback_provider + if resolved is None: + # Transient None — credentials may self-heal. Don't poison the cache. + return None + + _cached_cloud_provider = resolved + _cloud_provider_resolved = True return _cached_cloud_provider