fix(context_compressor): treat streaming premature-close as transient error

Problem: When a provider or proxy drops a streaming response mid-flight (httpcore raises RemoteProtocolError: "incomplete chunked read", "peer closed connection", "response ended prematurely", etc.), _generate_summary would not classify it as a transient error. Instead of retrying on the main model, it entered the generic 60-second cooldown, leaving context growing unbounded until the cooldown expired. Issue #18458. Root cause: _is_connection_error in auxiliary_client.py did not match httpcore's streaming premature-close error substrings. context_compressor.py's _generate_summary except block never called _is_connection_error, so those errors fell through to the 60-second generic cooldown rather than triggering the retry-on-main fallback path used for timeouts. Fix: 1. auxiliary_client.py — extend _is_connection_error keyword list with: "incomplete chunked read", "peer closed connection", "response ended prematurely", "unexpected eof", "remoteprotocolerror", "localprotocolerror". Also guard the `from openai import ...` with try/except ImportError so the function works in environments without the openai package. 2. context_compressor.py — import _is_connection_error and call it in _generate_summary's except block as _is_streaming_closed. Include _is_streaming_closed in the fallback-to-main condition (alongside _is_model_not_found, _is_timeout, _is_json_decode) and use the shorter 30s transient cooldown for streaming-closed errors. Tests: 4 new regression tests in TestStreamingClosedFallback: - test_incomplete_chunked_read_falls_back_to_main - test_peer_closed_connection_falls_back_to_main - test_streaming_closed_on_main_uses_short_cooldown (stash-verified) - test_non_streaming_unknown_error_still_uses_long_cooldown Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-18 04:41:56 +00:00 · 2026-05-09 12:33:23 -03:00 · 2026-05-09 12:33:23 -03:00 · 35f773c459
commit 35f773c459
parent 0c5c4d1b8d
3 changed files with 157 additions and 10 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -1840,10 +1840,12 @@ def _is_connection_error(exc: Exception) -> bool:
    distinct from API errors (4xx/5xx) which indicate the provider IS
    reachable but returned an error.
    """
-    from openai import APIConnectionError, APITimeoutError
-
-    if isinstance(exc, (APIConnectionError, APITimeoutError)):
-        return True
+    try:
+        from openai import APIConnectionError, APITimeoutError
+        if isinstance(exc, (APIConnectionError, APITimeoutError)):
+            return True
+    except ImportError:
+        pass
    # urllib3 / httpx / httpcore connection errors
    err_type = type(exc).__name__
    if any(kw in err_type for kw in ("Connection", "Timeout", "DNS", "SSL")):
@ -1853,6 +1855,16 @@ def _is_connection_error(exc: Exception) -> bool:
        "connection refused", "name or service not known",
        "no route to host", "network is unreachable",
        "timed out", "connection reset",
+        # httpcore / httpx streaming premature-close errors.  These surface
+        # when a proxy or provider drops the connection mid-stream and are
+        # transient by nature — the request should be retried or rerouted.
+        # See issue #18458.
+        "incomplete chunked read",
+        "peer closed connection",
+        "response ended prematurely",
+        "unexpected eof",
+        "remoteprotocolerror",
+        "localprotocolerror",
    )):
        return True
    return False
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -23,7 +23,7 @@ import re
 import time
 from typing import Any, Dict, List, Optional

-from agent.auxiliary_client import call_llm
+from agent.auxiliary_client import call_llm, _is_connection_error
 from agent.context_engine import ContextEngine
 from agent.model_metadata import (
    MINIMUM_CONTEXT_LENGTH,
@ -1000,6 +1000,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                isinstance(e, json.JSONDecodeError)
                or "expecting value" in _err_str
            )
+            # httpcore / httpx streaming premature-close errors surface as
+            # ConnectionError subclasses or plain Exception with characteristic
+            # substrings ("incomplete chunked read", "peer closed connection",
+            # "response ended prematurely", "unexpected eof").  These are
+            # transient network events; treat them like a timeout so we fall
+            # back to the main model instead of entering a 60-second cooldown.
+            # See issue #18458.
+            _is_streaming_closed = _is_connection_error(e)
            if _is_json_decode and not _is_model_not_found and not _is_timeout:
                logger.error(
                    "Context compression failed: auxiliary LLM returned a "
@ -1012,7 +1020,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    e,
                )
            if (
-                (_is_model_not_found or _is_timeout or _is_json_decode)
+                (_is_model_not_found or _is_timeout or _is_json_decode or _is_streaming_closed)
                and self.summary_model
                and self.summary_model != self.model
                and not getattr(self, "_summary_model_fallen_back", False)
@ -1021,6 +1029,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    _reason = "returned invalid JSON"
                elif _is_model_not_found:
                    _reason = "unavailable"
+                elif _is_streaming_closed:
+                    _reason = "closed stream prematurely"
                else:
                    _reason = "timed out"
                self._fallback_to_main_for_compression(e, _reason)
@ -1043,10 +1053,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                self._fallback_to_main_for_compression(e, "failed")
                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)

-            # Transient errors (timeout, rate limit, network, JSON decode) —
-            # shorter cooldown for JSON decode since the body shape can flip
-            # back to valid quickly when an upstream proxy recovers.
-            _transient_cooldown = 30 if _is_json_decode else 60
+            # Transient errors (timeout, rate limit, network, JSON decode,
+            # streaming premature-close) — shorter cooldown for JSON decode and
+            # streaming-closed since those conditions can self-resolve quickly.
+            _transient_cooldown = 30 if (_is_json_decode or _is_streaming_closed) else 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
            err_text = str(e).strip() or e.__class__.__name__
            if len(err_text) > 220: