diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index fbd79989209..dd3718acfb9 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -706,6 +706,16 @@ class _CodexCompletionsAdapter: close() except Exception: logger.debug("Codex auxiliary: client close during timeout failed", exc_info=True) + # The cached auxiliary client wraps this same ``self._client`` + # (or *is* a ``CodexAuxiliaryClient`` whose ``_real_client`` is + # this instance). After we close the httpx transport above, the + # cache must drop that entry — otherwise the next auxiliary call + # (compression retry, memory flush, etc.) reuses the dead client + # and fails fast with a connection error. See issue #23432. + try: + _evict_cached_client_instance(self._client) + except Exception: + logger.debug("Codex auxiliary: cache eviction on timeout failed", exc_info=True) def _check_cancelled() -> None: if deadline is not None and time.monotonic() >= deadline: @@ -1984,6 +1994,37 @@ def _evict_cached_clients(provider: str) -> None: _client_cache.pop(key, None) +def _evict_cached_client_instance(target: Any) -> bool: + """Drop the cache entry whose stored client is *target*. + + Used when a specific cached client has been poisoned (closed httpx + transport after a timeout, broken streaming session, etc.) so the next + auxiliary call rebuilds rather than reusing the dead instance. + + Walks ``CodexAuxiliaryClient`` wrappers via their ``_real_client`` so a + timeout that closes the underlying ``OpenAI`` client also evicts the + Codex shim that exposed it. + + Returns True when at least one entry was evicted. + """ + if target is None: + return False + evicted = False + with _client_cache_lock: + for key in list(_client_cache.keys()): + entry = _client_cache.get(key) + if entry is None: + continue + cached = entry[0] + if cached is None: + continue + real = getattr(cached, "_real_client", None) + if cached is target or real is target: + del _client_cache[key] + evicted = True + return evicted + + def _pool_cache_hint( provider: str, *, @@ -4200,6 +4241,17 @@ def call_llm( base_url=str(getattr(fb_client, "base_url", "") or "")) return _validate_llm_response( fb_client.chat.completions.create(**fb_kwargs), task) + # Connection/timeout errors leave the cached client poisoned (closed + # httpx transport, half-read stream, dead async loop). Drop it from + # the cache regardless of whether we found a fallback above so the + # next auxiliary call rebuilds a fresh client instead of reusing the + # dead one. See issue #23432. + if _is_connection_error(first_err): + try: + _evict_cached_client_instance(client) + except Exception: + logger.debug("Auxiliary: cache eviction after connection error failed", + exc_info=True) raise @@ -4517,4 +4569,12 @@ async def async_call_llm( fb_kwargs["model"] = async_fb_model return _validate_llm_response( await async_fb.chat.completions.create(**fb_kwargs), task) + # Mirror the sync path: drop poisoned clients on connection/timeout + # so the next aux call rebuilds. See issue #23432. + if _is_connection_error(first_err): + try: + _evict_cached_client_instance(client) + except Exception: + logger.debug("Auxiliary (async): cache eviction after connection error failed", + exc_info=True) raise diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 5f49f74a2be..702c4c17552 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -2123,6 +2123,191 @@ class TestCodexAuxiliaryAdapterTimeout: assert time.monotonic() - started < 0.14 +# --------------------------------------------------------------------------- +# Issue #23432 — auxiliary timeout poisons cached client; later aux calls fail +# --------------------------------------------------------------------------- + +class TestAuxiliaryClientPoisonedCacheEviction: + """Connection/timeout errors must evict the cached aux client. + + Otherwise the next auxiliary call (compression retry, memory flush, + background review) reuses the closed httpx transport and fails with + ``Connection error`` even though the main provider route is healthy. + See https://github.com/NousResearch/hermes-agent/issues/23432. + """ + + def test_evict_cached_client_instance_drops_direct_match(self): + from agent.auxiliary_client import ( + _client_cache, _client_cache_lock, _evict_cached_client_instance, + ) + + target = MagicMock(name="target_client") + other = MagicMock(name="other_client") + with _client_cache_lock: + _client_cache.clear() + _client_cache[("openrouter", False, None, None, None)] = (target, "x", None) + _client_cache[("anthropic", False, None, None, None)] = (other, "y", None) + try: + assert _evict_cached_client_instance(target) is True + assert ("openrouter", False, None, None, None) not in _client_cache + assert ("anthropic", False, None, None, None) in _client_cache + finally: + with _client_cache_lock: + _client_cache.clear() + + def test_evict_cached_client_instance_walks_codex_wrapper(self): + """Closing the underlying OpenAI client must evict the Codex shim.""" + from agent.auxiliary_client import ( + _client_cache, _client_cache_lock, _evict_cached_client_instance, + CodexAuxiliaryClient, + ) + + real = SimpleNamespace(api_key="k", base_url="https://chatgpt.com/backend-api/codex", + responses=SimpleNamespace(stream=lambda **k: None), + close=lambda: None) + wrapper = CodexAuxiliaryClient(real, "gpt-5.5") + with _client_cache_lock: + _client_cache.clear() + _client_cache[("openai-codex", False, None, None, None)] = (wrapper, "gpt-5.5", None) + try: + # Eviction by the inner OpenAI client must remove the wrapper entry. + assert _evict_cached_client_instance(real) is True + assert ("openai-codex", False, None, None, None) not in _client_cache + finally: + with _client_cache_lock: + _client_cache.clear() + + def test_evict_cached_client_instance_handles_none_and_misses(self): + from agent.auxiliary_client import _evict_cached_client_instance + + assert _evict_cached_client_instance(None) is False + assert _evict_cached_client_instance(MagicMock()) is False + + def test_codex_timeout_evicts_cached_wrapper(self): + """The timeout closer evicts the cache entry that wraps the closed client.""" + from agent.auxiliary_client import ( + _client_cache, _client_cache_lock, + _CodexCompletionsAdapter, CodexAuxiliaryClient, + ) + + class SlowAliveStream: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def __iter__(self): + for _ in range(20): + time.sleep(0.01) + yield SimpleNamespace(type="response.in_progress") + + def get_final_response(self): # pragma: no cover — timeout fires first + return SimpleNamespace(output=[], usage=None) + + closed = {"flag": False} + + class FakeClient: + def __init__(self): + self.responses = SimpleNamespace(stream=lambda **k: SlowAliveStream()) + self.api_key = "k" + self.base_url = "https://chatgpt.com/backend-api/codex" + + def close(self): + closed["flag"] = True + + fake_real = FakeClient() + wrapper = CodexAuxiliaryClient(fake_real, "gpt-5.5") + cache_key = ("openai-codex", False, None, None, None) + with _client_cache_lock: + _client_cache.clear() + _client_cache[cache_key] = (wrapper, "gpt-5.5", None) + try: + adapter = _CodexCompletionsAdapter(fake_real, "gpt-5.5") + with pytest.raises(TimeoutError): + adapter.create( + messages=[{"role": "user", "content": "x"}], + timeout=0.05, + ) + assert closed["flag"] is True, "timeout closer must close inner client" + assert cache_key not in _client_cache, ( + "timeout closer must evict cache entry that wraps the closed client" + ) + finally: + with _client_cache_lock: + _client_cache.clear() + + def test_call_llm_evicts_on_connection_error_with_explicit_provider(self): + """Connection error on an explicit provider must drop the cached client. + + This is the exact reporter scenario: ``auxiliary.compression.provider: + main`` (resolves to ``openai-codex``) → no fallback chain runs (not + auto), but the cached client was poisoned by a prior timeout and must + be evicted so the next call rebuilds. + """ + from agent.auxiliary_client import _client_cache, _client_cache_lock + + poisoned = MagicMock(name="poisoned_client") + poisoned.base_url = "https://chatgpt.com/backend-api/codex" + poisoned.chat.completions.create.side_effect = ConnectionError("transport closed") + + cache_key = ("openai-codex", False, None, None, None) + with _client_cache_lock: + _client_cache.clear() + _client_cache[cache_key] = (poisoned, "gpt-5.5", None) + + try: + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None), + ), patch( + "agent.auxiliary_client._get_cached_client", + return_value=(poisoned, "gpt-5.5"), + ): + with pytest.raises(ConnectionError): + call_llm( + task="compression", + messages=[{"role": "user", "content": "x"}], + ) + assert cache_key not in _client_cache, ( + "connection error must evict cached client so the next call rebuilds" + ) + finally: + with _client_cache_lock: + _client_cache.clear() + + @pytest.mark.asyncio + async def test_async_call_llm_evicts_on_connection_error_with_explicit_provider(self): + from agent.auxiliary_client import _client_cache, _client_cache_lock + + poisoned = MagicMock(name="poisoned_async_client") + poisoned.base_url = "https://chatgpt.com/backend-api/codex" + poisoned.chat.completions.create = AsyncMock(side_effect=ConnectionError("transport closed")) + + cache_key = ("openai-codex", True, None, None, None) + with _client_cache_lock: + _client_cache.clear() + _client_cache[cache_key] = (poisoned, "gpt-5.5", None) + + try: + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None), + ), patch( + "agent.auxiliary_client._get_cached_client", + return_value=(poisoned, "gpt-5.5"), + ): + with pytest.raises(ConnectionError): + await async_call_llm( + task="compression", + messages=[{"role": "user", "content": "x"}], + ) + assert cache_key not in _client_cache + finally: + with _client_cache_lock: + _client_cache.clear() + + # --------------------------------------------------------------------------- # _build_call_kwargs — tool dedup at API boundary # ---------------------------------------------------------------------------