diff --git a/tests/tools/test_browser_command_timeout_race.py b/tests/tools/test_browser_command_timeout_race.py new file mode 100644 index 00000000000..812f3375fc6 --- /dev/null +++ b/tests/tools/test_browser_command_timeout_race.py @@ -0,0 +1,109 @@ +"""Regression tests for the _get_command_timeout cache race (#14331). + +Before the fix: + ``_command_timeout_resolved`` was set to ``True`` *before* + ``_cached_command_timeout`` was assigned. If the body raised between those + two statements (e.g. inside ``read_raw_config``), or a re-entrant/concurrent + reader hit the cache between them, the function returned ``None``. Callers + then evaluated ``max(None, 60)`` and crashed with:: + + TypeError: '>' not supported between instances of 'int' and 'NoneType' + +The fix: + 1. assign cache before flipping the resolved flag, + 2. flip the resolved flag *off* before nulling the cache in + ``cleanup_all_browsers()``, + 3. expose ``_safe_command_timeout()`` as defense in depth. +""" + +from unittest.mock import patch + + +class TestGetCommandTimeoutRace: + def setup_method(self): + from tools import browser_tool + + self.bt = browser_tool + self._orig_cache = browser_tool._cached_command_timeout + self._orig_resolved = browser_tool._command_timeout_resolved + browser_tool._cached_command_timeout = None + browser_tool._command_timeout_resolved = False + + def teardown_method(self): + self.bt._cached_command_timeout = self._orig_cache + self.bt._command_timeout_resolved = self._orig_resolved + + def test_returns_default_when_config_read_raises(self): + """If config reading blows up, we still return an int (not None).""" + with patch( + "hermes_cli.config.read_raw_config", side_effect=RuntimeError("boom") + ): + result = self.bt._get_command_timeout() + + assert isinstance(result, int) + assert result == self.bt.DEFAULT_COMMAND_TIMEOUT + # Cache must be populated (not left as None) once resolved is True. + assert self.bt._cached_command_timeout is not None + assert self.bt._command_timeout_resolved is True + + def test_cache_assigned_before_resolved_flag(self): + """Invariant: if resolved=True then cache must not be None.""" + with patch( + "hermes_cli.config.read_raw_config", side_effect=RuntimeError("boom") + ): + self.bt._get_command_timeout() + + # The bug was: resolved=True while cache=None. Assert that's impossible. + assert not ( + self.bt._command_timeout_resolved + and self.bt._cached_command_timeout is None + ) + + def test_safe_command_timeout_never_returns_none(self): + """Defense-in-depth helper survives a manually corrupted cache.""" + # Simulate the pre-fix bug state directly. + self.bt._command_timeout_resolved = True + self.bt._cached_command_timeout = None + + result = self.bt._safe_command_timeout() + assert isinstance(result, int) + assert result == self.bt.DEFAULT_COMMAND_TIMEOUT + + def test_safe_command_timeout_preserves_zero(self): + """``or DEFAULT_COMMAND_TIMEOUT`` would swallow a legit 0. + + We use ``is not None`` so a configured 0 stays 0. (In practice the + caller floor is 5s, but the helper itself must be honest.) + """ + self.bt._command_timeout_resolved = True + self.bt._cached_command_timeout = 0 + + assert self.bt._safe_command_timeout() == 0 + + def test_cleanup_resets_flag_before_nulling_cache(self): + """After cleanup, observers must never see resolved=True with cache=None.""" + # Warm the cache first. + with patch( + "hermes_cli.config.read_raw_config", side_effect=RuntimeError("boom") + ): + self.bt._get_command_timeout() + assert self.bt._command_timeout_resolved is True + + self.bt.cleanup_all_browsers() + + # Post-cleanup: both must be reset together; specifically resolved must + # not be True while cache is None (the original race window). + assert not ( + self.bt._command_timeout_resolved + and self.bt._cached_command_timeout is None + ) + + def test_max_call_site_pattern_never_raises(self): + """The exact expression from browser_navigate must not raise TypeError.""" + # Force the corrupted state the bug used to produce. + self.bt._command_timeout_resolved = True + self.bt._cached_command_timeout = None + + # This is the literal line from browser_navigate() after the fix. + timeout = max(self.bt._safe_command_timeout(), 60) + assert timeout == 60 diff --git a/tools/browser_tool.py b/tools/browser_tool.py index c5d0a4fe250..36853214668 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -244,10 +244,9 @@ def _get_command_timeout() -> int: cached after the first call and cleared by ``cleanup_all_browsers()``. """ global _cached_command_timeout, _command_timeout_resolved - if _command_timeout_resolved: - return _cached_command_timeout # type: ignore[return-value] + if _command_timeout_resolved and _cached_command_timeout is not None: + return _cached_command_timeout - _command_timeout_resolved = True result = DEFAULT_COMMAND_TIMEOUT try: from hermes_cli.config import read_raw_config @@ -257,10 +256,26 @@ def _get_command_timeout() -> int: result = max(int(val), 5) # Floor at 5s to avoid instant kills except Exception as e: logger.debug("Could not read command_timeout from config: %s", e) + # Assign the cached value BEFORE flipping the resolved flag so a + # concurrent reader cannot observe ``resolved=True`` while the cache + # is still ``None`` (see issue #14331). _cached_command_timeout = result + _command_timeout_resolved = True return result +def _safe_command_timeout() -> int: + """Like ``_get_command_timeout`` but guaranteed non-None. + + Defense in depth against the race fixed in ``_get_command_timeout``: + if anything ever returns ``None`` (e.g. cache reset mid-flight), fall + back to ``DEFAULT_COMMAND_TIMEOUT``. Uses ``is not None`` rather than + ``or`` so a legitimately configured ``0`` is preserved. + """ + val = _get_command_timeout() + return val if val is not None else DEFAULT_COMMAND_TIMEOUT + + def _get_open_command_timeout(*, first_open: bool = False) -> int: """Timeout for agent-browser ``open`` (navigation / daemon cold start).""" base = _get_command_timeout() @@ -2180,7 +2195,7 @@ def _run_browser_command( Parsed JSON response from agent-browser """ if timeout is None: - timeout = _get_command_timeout() + timeout = _safe_command_timeout() args = args or [] # Build the command @@ -4029,8 +4044,10 @@ def cleanup_all_browsers() -> None: _cached_agent_browser = None _agent_browser_resolved = False _discover_homebrew_node_dirs.cache_clear() - _cached_command_timeout = None + # Flip the resolved flag BEFORE nulling the cache so a concurrent + # reader never sees ``resolved=True`` with ``cache=None`` (#14331). _command_timeout_resolved = False + _cached_command_timeout = None _cached_chromium_installed = None global _chromium_autoinstall_attempted _chromium_autoinstall_attempted = False