fix(browser_tool): resolve race in _get_command_timeout cache returning None (#14331)

# Conflicts:
#	tools/browser_tool.py
This commit is contained in:
Sanjay Santhanam 2026-06-29 02:10:00 -07:00 committed by Teknium
parent bf0d8fed8e
commit c79e6bceae
2 changed files with 131 additions and 5 deletions

View file

@ -0,0 +1,109 @@
"""Regression tests for the _get_command_timeout cache race (#14331).
Before the fix:
``_command_timeout_resolved`` was set to ``True`` *before*
``_cached_command_timeout`` was assigned. If the body raised between those
two statements (e.g. inside ``read_raw_config``), or a re-entrant/concurrent
reader hit the cache between them, the function returned ``None``. Callers
then evaluated ``max(None, 60)`` and crashed with::
TypeError: '>' not supported between instances of 'int' and 'NoneType'
The fix:
1. assign cache before flipping the resolved flag,
2. flip the resolved flag *off* before nulling the cache in
``cleanup_all_browsers()``,
3. expose ``_safe_command_timeout()`` as defense in depth.
"""
from unittest.mock import patch
class TestGetCommandTimeoutRace:
def setup_method(self):
from tools import browser_tool
self.bt = browser_tool
self._orig_cache = browser_tool._cached_command_timeout
self._orig_resolved = browser_tool._command_timeout_resolved
browser_tool._cached_command_timeout = None
browser_tool._command_timeout_resolved = False
def teardown_method(self):
self.bt._cached_command_timeout = self._orig_cache
self.bt._command_timeout_resolved = self._orig_resolved
def test_returns_default_when_config_read_raises(self):
"""If config reading blows up, we still return an int (not None)."""
with patch(
"hermes_cli.config.read_raw_config", side_effect=RuntimeError("boom")
):
result = self.bt._get_command_timeout()
assert isinstance(result, int)
assert result == self.bt.DEFAULT_COMMAND_TIMEOUT
# Cache must be populated (not left as None) once resolved is True.
assert self.bt._cached_command_timeout is not None
assert self.bt._command_timeout_resolved is True
def test_cache_assigned_before_resolved_flag(self):
"""Invariant: if resolved=True then cache must not be None."""
with patch(
"hermes_cli.config.read_raw_config", side_effect=RuntimeError("boom")
):
self.bt._get_command_timeout()
# The bug was: resolved=True while cache=None. Assert that's impossible.
assert not (
self.bt._command_timeout_resolved
and self.bt._cached_command_timeout is None
)
def test_safe_command_timeout_never_returns_none(self):
"""Defense-in-depth helper survives a manually corrupted cache."""
# Simulate the pre-fix bug state directly.
self.bt._command_timeout_resolved = True
self.bt._cached_command_timeout = None
result = self.bt._safe_command_timeout()
assert isinstance(result, int)
assert result == self.bt.DEFAULT_COMMAND_TIMEOUT
def test_safe_command_timeout_preserves_zero(self):
"""``or DEFAULT_COMMAND_TIMEOUT`` would swallow a legit 0.
We use ``is not None`` so a configured 0 stays 0. (In practice the
caller floor is 5s, but the helper itself must be honest.)
"""
self.bt._command_timeout_resolved = True
self.bt._cached_command_timeout = 0
assert self.bt._safe_command_timeout() == 0
def test_cleanup_resets_flag_before_nulling_cache(self):
"""After cleanup, observers must never see resolved=True with cache=None."""
# Warm the cache first.
with patch(
"hermes_cli.config.read_raw_config", side_effect=RuntimeError("boom")
):
self.bt._get_command_timeout()
assert self.bt._command_timeout_resolved is True
self.bt.cleanup_all_browsers()
# Post-cleanup: both must be reset together; specifically resolved must
# not be True while cache is None (the original race window).
assert not (
self.bt._command_timeout_resolved
and self.bt._cached_command_timeout is None
)
def test_max_call_site_pattern_never_raises(self):
"""The exact expression from browser_navigate must not raise TypeError."""
# Force the corrupted state the bug used to produce.
self.bt._command_timeout_resolved = True
self.bt._cached_command_timeout = None
# This is the literal line from browser_navigate() after the fix.
timeout = max(self.bt._safe_command_timeout(), 60)
assert timeout == 60

View file

@ -244,10 +244,9 @@ def _get_command_timeout() -> int:
cached after the first call and cleared by ``cleanup_all_browsers()``.
"""
global _cached_command_timeout, _command_timeout_resolved
if _command_timeout_resolved:
return _cached_command_timeout # type: ignore[return-value]
if _command_timeout_resolved and _cached_command_timeout is not None:
return _cached_command_timeout
_command_timeout_resolved = True
result = DEFAULT_COMMAND_TIMEOUT
try:
from hermes_cli.config import read_raw_config
@ -257,10 +256,26 @@ def _get_command_timeout() -> int:
result = max(int(val), 5) # Floor at 5s to avoid instant kills
except Exception as e:
logger.debug("Could not read command_timeout from config: %s", e)
# Assign the cached value BEFORE flipping the resolved flag so a
# concurrent reader cannot observe ``resolved=True`` while the cache
# is still ``None`` (see issue #14331).
_cached_command_timeout = result
_command_timeout_resolved = True
return result
def _safe_command_timeout() -> int:
"""Like ``_get_command_timeout`` but guaranteed non-None.
Defense in depth against the race fixed in ``_get_command_timeout``:
if anything ever returns ``None`` (e.g. cache reset mid-flight), fall
back to ``DEFAULT_COMMAND_TIMEOUT``. Uses ``is not None`` rather than
``or`` so a legitimately configured ``0`` is preserved.
"""
val = _get_command_timeout()
return val if val is not None else DEFAULT_COMMAND_TIMEOUT
def _get_open_command_timeout(*, first_open: bool = False) -> int:
"""Timeout for agent-browser ``open`` (navigation / daemon cold start)."""
base = _get_command_timeout()
@ -2180,7 +2195,7 @@ def _run_browser_command(
Parsed JSON response from agent-browser
"""
if timeout is None:
timeout = _get_command_timeout()
timeout = _safe_command_timeout()
args = args or []
# Build the command
@ -4029,8 +4044,10 @@ def cleanup_all_browsers() -> None:
_cached_agent_browser = None
_agent_browser_resolved = False
_discover_homebrew_node_dirs.cache_clear()
_cached_command_timeout = None
# Flip the resolved flag BEFORE nulling the cache so a concurrent
# reader never sees ``resolved=True`` with ``cache=None`` (#14331).
_command_timeout_resolved = False
_cached_command_timeout = None
_cached_chromium_installed = None
global _chromium_autoinstall_attempted
_chromium_autoinstall_attempted = False