mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-24 05:41:40 +00:00
fix(context_compressor): treat streaming premature-close as transient error
Problem: When a provider or proxy drops a streaming response mid-flight (httpcore raises RemoteProtocolError: "incomplete chunked read", "peer closed connection", "response ended prematurely", etc.), _generate_summary would not classify it as a transient error. Instead of retrying on the main model, it entered the generic 60-second cooldown, leaving context growing unbounded until the cooldown expired. Issue #18458. Root cause: _is_connection_error in auxiliary_client.py did not match httpcore's streaming premature-close error substrings. context_compressor.py's _generate_summary except block never called _is_connection_error, so those errors fell through to the 60-second generic cooldown rather than triggering the retry-on-main fallback path used for timeouts. Fix: 1. auxiliary_client.py — extend _is_connection_error keyword list with: "incomplete chunked read", "peer closed connection", "response ended prematurely", "unexpected eof", "remoteprotocolerror", "localprotocolerror". Also guard the `from openai import ...` with try/except ImportError so the function works in environments without the openai package. 2. context_compressor.py — import _is_connection_error and call it in _generate_summary's except block as _is_streaming_closed. Include _is_streaming_closed in the fallback-to-main condition (alongside _is_model_not_found, _is_timeout, _is_json_decode) and use the shorter 30s transient cooldown for streaming-closed errors. Tests: 4 new regression tests in TestStreamingClosedFallback: - test_incomplete_chunked_read_falls_back_to_main - test_peer_closed_connection_falls_back_to_main - test_streaming_closed_on_main_uses_short_cooldown (stash-verified) - test_non_streaming_unknown_error_still_uses_long_cooldown Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
0c5c4d1b8d
commit
35f773c459
3 changed files with 157 additions and 10 deletions
|
|
@ -1840,10 +1840,12 @@ def _is_connection_error(exc: Exception) -> bool:
|
|||
distinct from API errors (4xx/5xx) which indicate the provider IS
|
||||
reachable but returned an error.
|
||||
"""
|
||||
from openai import APIConnectionError, APITimeoutError
|
||||
|
||||
if isinstance(exc, (APIConnectionError, APITimeoutError)):
|
||||
return True
|
||||
try:
|
||||
from openai import APIConnectionError, APITimeoutError
|
||||
if isinstance(exc, (APIConnectionError, APITimeoutError)):
|
||||
return True
|
||||
except ImportError:
|
||||
pass
|
||||
# urllib3 / httpx / httpcore connection errors
|
||||
err_type = type(exc).__name__
|
||||
if any(kw in err_type for kw in ("Connection", "Timeout", "DNS", "SSL")):
|
||||
|
|
@ -1853,6 +1855,16 @@ def _is_connection_error(exc: Exception) -> bool:
|
|||
"connection refused", "name or service not known",
|
||||
"no route to host", "network is unreachable",
|
||||
"timed out", "connection reset",
|
||||
# httpcore / httpx streaming premature-close errors. These surface
|
||||
# when a proxy or provider drops the connection mid-stream and are
|
||||
# transient by nature — the request should be retried or rerouted.
|
||||
# See issue #18458.
|
||||
"incomplete chunked read",
|
||||
"peer closed connection",
|
||||
"response ended prematurely",
|
||||
"unexpected eof",
|
||||
"remoteprotocolerror",
|
||||
"localprotocolerror",
|
||||
)):
|
||||
return True
|
||||
return False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue