diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 5f9dcfa2e0d..fbde99bda5f 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -890,7 +890,15 @@ class ContextCompressor(ContextEngine): # This is independent of the abort_on_summary_failure config flag: # rotating on a broken credential is never the right behavior. self._last_summary_auth_failure: bool = False - # When a user-configured summary model fails and we recover by + # Set when summary generation ultimately fails due to a transient + # network/connection error (httpx/httpcore connection drop, premature + # stream close, etc.) — distinct from auth failures but treated the + # same way by compress(): ABORT and preserve the session unchanged + # rather than destroy the middle window for a deterministic + # "summary unavailable" marker. Retrying once the network recovers is + # strictly better than discarding context for a transient blip + # (#29559, #25585). Independent of abort_on_summary_failure. + self._last_summary_network_failure: bool = False # retrying on the main model, record the failure so gateway / # CLI callers can still warn the user even though compression # succeeded. Silent recovery would hide the broken config. @@ -1687,6 +1695,7 @@ This compaction should PRIORITISE preserving all information related to the focu self._summary_model_fallen_back = False self._last_summary_error = None self._last_summary_auth_failure = False + self._last_summary_network_failure = False return self._with_summary_prefix(summary) except Exception as e: # ``call_llm`` raises ``RuntimeError`` for two very different cases: @@ -1819,6 +1828,15 @@ This compaction should PRIORITISE preserving all information related to the focu if len(err_text) > 220: err_text = err_text[:217].rstrip() + "..." self._last_summary_error = err_text + # A terminal connection/network failure (we reach this branch only + # after any main-model fallback has already been tried or is + # unavailable). Flag it so compress() ABORTS and preserves the + # session unchanged instead of destroying the middle window for a + # placeholder marker — retrying once the network recovers is + # strictly better than dropping context (#29559, #25585). Mirrors + # the auth-failure carve-out; independent of abort_on_summary_failure. + if _is_streaming_closed: + self._last_summary_network_failure = True logger.warning( "Failed to generate context summary: %s. " "Further summary attempts paused for %d seconds.", @@ -2382,6 +2400,7 @@ This compaction should PRIORITISE preserving all information related to the focu self._last_aux_model_failure_model = None self._last_compress_aborted = False self._last_summary_auth_failure = False + self._last_summary_network_failure = False # Manual /compress (force=True) bypasses the failure cooldown so the # user can retry immediately after an auto-compress abort. Without @@ -2498,15 +2517,21 @@ This compaction should PRIORITISE preserving all information related to the focu # surface a warning. # Default is False (historical behavior). # - # EXCEPTION — auth failures always abort. A 401/403 from the summary - # call means the credential or endpoint is broken (invalid/blocked - # key, or a token pointed at the wrong inference host). Rotating into + # EXCEPTION — auth AND transient network failures always abort. A + # 401/403 from the summary call means the credential or endpoint is + # broken (invalid/blocked key, or a token pointed at the wrong + # inference host). A connection/stream-close error means the network + # blipped at the compaction moment (#29559). In BOTH cases rotating into # a child session with a placeholder summary on a broken credential # strands the user on a degraded session for zero benefit — every # subsequent call fails the same way. So when the failure was an auth # error we abort regardless of abort_on_summary_failure, preserving # the conversation unchanged until the credential is fixed. - if not summary and (self.abort_on_summary_failure or self._last_summary_auth_failure): + if not summary and ( + self.abort_on_summary_failure + or self._last_summary_auth_failure + or self._last_summary_network_failure + ): n_skipped = compress_end - compress_start self._last_summary_dropped_count = 0 # nothing actually dropped self._last_summary_fallback_used = False @@ -2521,6 +2546,15 @@ This compaction should PRIORITISE preserving all information related to the focu "with /compress or start fresh with /new.", n_skipped, ) + elif self._last_summary_network_failure: + logger.warning( + "Summary generation failed with a network/connection " + "error — aborting compression. %d message(s) preserved " + "unchanged; the session was NOT rotated. This is " + "transient: retry with /compress once connectivity " + "recovers, or continue the conversation as-is.", + n_skipped, + ) else: logger.warning( "Summary generation failed — aborting compression " diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index cdbf66469c6..84e7d1847d7 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -683,6 +683,47 @@ class TestAuthFailureAborts: assert c._last_compress_aborted is False assert len(result) < len(msgs) # middle window dropped + def test_generate_summary_flags_network_failure(self): + """A connection/network error on the summary call flags + _last_summary_network_failure (#29559).""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True) + with patch( + "agent.context_compressor.call_llm", + side_effect=ConnectionError("Connection error."), + ): + result = c._generate_summary(self._msgs()) + assert result is None + assert c._last_summary_network_failure is True + assert c._last_summary_auth_failure is False + + def test_compress_aborts_on_network_failure_despite_flag_false(self): + """#29559/#25585: abort_on_summary_failure=False (default), but a + transient connection error must ABORT — messages returned unchanged, + _last_compress_aborted=True — NOT drop the middle window. Retrying once + the network recovers beats discarding context for a transient blip.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="test", + quiet_mode=True, + protect_first_n=2, + protect_last_n=2, + abort_on_summary_failure=False, + ) + msgs = self._msgs(12) + with patch( + "agent.context_compressor.call_llm", + side_effect=ConnectionError("Connection error."), + ): + result = c.compress(msgs, current_tokens=999999, force=True) + # Session must NOT be compressed/rotated — same messages back. + assert result == msgs + assert len(result) == len(msgs) + assert c._last_compress_aborted is True + assert c._last_summary_network_failure is True + # Did NOT fall through to the static-fallback (drop-the-middle) path. + assert c._last_summary_fallback_used is False + def test_aux_model_auth_failure_recovers_on_main_no_abort(self): """A 401 from a DISTINCT auxiliary summary_model retries on the main model; if main succeeds, the auth flag is cleared and compression is