mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(auxiliary): evict cached client on timeout/connection error (#23482)
A Codex auxiliary timeout closes the underlying OpenAI client (so the streaming hang doesn't sit until the user kills the session), but the cached wrapper kept pointing at the now-dead transport. Subsequent auxiliary calls (compression retry, memory flush, background review, title generation routed via provider: main) reused that closed client and failed fast with 'Connection error' until the gateway restarted — even though the main agent route was healthy the whole time. Sync `_get_cached_client` had no liveness check (async did, via loop identity), and the connection-error fallback in `call_llm` only fired on the auto provider path, so an explicit provider — including the common `auxiliary.compression.provider: main` shape — never evicted. Three fixes: * New `_evict_cached_client_instance(target)` helper that drops the cache entry whose stored client is target (or wraps it via `_real_client`, for `CodexAuxiliaryClient`). * `_CodexCompletionsAdapter._close_client_on_timeout` evicts the wrapper after closing the inner OpenAI client. * `call_llm` and `async_call_llm` evict on `_is_connection_error` before re-raising, regardless of whether the provider is auto. Net effect: one timeout costs one summary attempt + the existing 30s compressor cooldown; the next compaction rebuilds the client and works. Non-connection errors (4xx/5xx) do not evict, so cache hits stay stable. Closes #23432
This commit is contained in:
parent
ae83a54be4
commit
e5bce320db
2 changed files with 245 additions and 0 deletions
|
|
@ -706,6 +706,16 @@ class _CodexCompletionsAdapter:
|
|||
close()
|
||||
except Exception:
|
||||
logger.debug("Codex auxiliary: client close during timeout failed", exc_info=True)
|
||||
# The cached auxiliary client wraps this same ``self._client``
|
||||
# (or *is* a ``CodexAuxiliaryClient`` whose ``_real_client`` is
|
||||
# this instance). After we close the httpx transport above, the
|
||||
# cache must drop that entry — otherwise the next auxiliary call
|
||||
# (compression retry, memory flush, etc.) reuses the dead client
|
||||
# and fails fast with a connection error. See issue #23432.
|
||||
try:
|
||||
_evict_cached_client_instance(self._client)
|
||||
except Exception:
|
||||
logger.debug("Codex auxiliary: cache eviction on timeout failed", exc_info=True)
|
||||
|
||||
def _check_cancelled() -> None:
|
||||
if deadline is not None and time.monotonic() >= deadline:
|
||||
|
|
@ -1984,6 +1994,37 @@ def _evict_cached_clients(provider: str) -> None:
|
|||
_client_cache.pop(key, None)
|
||||
|
||||
|
||||
def _evict_cached_client_instance(target: Any) -> bool:
|
||||
"""Drop the cache entry whose stored client is *target*.
|
||||
|
||||
Used when a specific cached client has been poisoned (closed httpx
|
||||
transport after a timeout, broken streaming session, etc.) so the next
|
||||
auxiliary call rebuilds rather than reusing the dead instance.
|
||||
|
||||
Walks ``CodexAuxiliaryClient`` wrappers via their ``_real_client`` so a
|
||||
timeout that closes the underlying ``OpenAI`` client also evicts the
|
||||
Codex shim that exposed it.
|
||||
|
||||
Returns True when at least one entry was evicted.
|
||||
"""
|
||||
if target is None:
|
||||
return False
|
||||
evicted = False
|
||||
with _client_cache_lock:
|
||||
for key in list(_client_cache.keys()):
|
||||
entry = _client_cache.get(key)
|
||||
if entry is None:
|
||||
continue
|
||||
cached = entry[0]
|
||||
if cached is None:
|
||||
continue
|
||||
real = getattr(cached, "_real_client", None)
|
||||
if cached is target or real is target:
|
||||
del _client_cache[key]
|
||||
evicted = True
|
||||
return evicted
|
||||
|
||||
|
||||
def _pool_cache_hint(
|
||||
provider: str,
|
||||
*,
|
||||
|
|
@ -4200,6 +4241,17 @@ def call_llm(
|
|||
base_url=str(getattr(fb_client, "base_url", "") or ""))
|
||||
return _validate_llm_response(
|
||||
fb_client.chat.completions.create(**fb_kwargs), task)
|
||||
# Connection/timeout errors leave the cached client poisoned (closed
|
||||
# httpx transport, half-read stream, dead async loop). Drop it from
|
||||
# the cache regardless of whether we found a fallback above so the
|
||||
# next auxiliary call rebuilds a fresh client instead of reusing the
|
||||
# dead one. See issue #23432.
|
||||
if _is_connection_error(first_err):
|
||||
try:
|
||||
_evict_cached_client_instance(client)
|
||||
except Exception:
|
||||
logger.debug("Auxiliary: cache eviction after connection error failed",
|
||||
exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
|
|
@ -4517,4 +4569,12 @@ async def async_call_llm(
|
|||
fb_kwargs["model"] = async_fb_model
|
||||
return _validate_llm_response(
|
||||
await async_fb.chat.completions.create(**fb_kwargs), task)
|
||||
# Mirror the sync path: drop poisoned clients on connection/timeout
|
||||
# so the next aux call rebuilds. See issue #23432.
|
||||
if _is_connection_error(first_err):
|
||||
try:
|
||||
_evict_cached_client_instance(client)
|
||||
except Exception:
|
||||
logger.debug("Auxiliary (async): cache eviction after connection error failed",
|
||||
exc_info=True)
|
||||
raise
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue