Merge pull request #51881 from NousResearch/fix/29559-compression-abort-on-network-failure

fix(compression): abort + preserve context on transient network summary failure (#29559, #25585)
This commit is contained in:
kshitij 2026-06-24 19:54:21 +05:30 committed by GitHub
commit 7fb2027d85
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 80 additions and 5 deletions

View file

@ -890,7 +890,15 @@ class ContextCompressor(ContextEngine):
# This is independent of the abort_on_summary_failure config flag:
# rotating on a broken credential is never the right behavior.
self._last_summary_auth_failure: bool = False
# When a user-configured summary model fails and we recover by
# Set when summary generation ultimately fails due to a transient
# network/connection error (httpx/httpcore connection drop, premature
# stream close, etc.) — distinct from auth failures but treated the
# same way by compress(): ABORT and preserve the session unchanged
# rather than destroy the middle window for a deterministic
# "summary unavailable" marker. Retrying once the network recovers is
# strictly better than discarding context for a transient blip
# (#29559, #25585). Independent of abort_on_summary_failure.
self._last_summary_network_failure: bool = False
# retrying on the main model, record the failure so gateway /
# CLI callers can still warn the user even though compression
# succeeded. Silent recovery would hide the broken config.
@ -1687,6 +1695,7 @@ This compaction should PRIORITISE preserving all information related to the focu
self._summary_model_fallen_back = False
self._last_summary_error = None
self._last_summary_auth_failure = False
self._last_summary_network_failure = False
return self._with_summary_prefix(summary)
except Exception as e:
# ``call_llm`` raises ``RuntimeError`` for two very different cases:
@ -1819,6 +1828,15 @@ This compaction should PRIORITISE preserving all information related to the focu
if len(err_text) > 220:
err_text = err_text[:217].rstrip() + "..."
self._last_summary_error = err_text
# A terminal connection/network failure (we reach this branch only
# after any main-model fallback has already been tried or is
# unavailable). Flag it so compress() ABORTS and preserves the
# session unchanged instead of destroying the middle window for a
# placeholder marker — retrying once the network recovers is
# strictly better than dropping context (#29559, #25585). Mirrors
# the auth-failure carve-out; independent of abort_on_summary_failure.
if _is_streaming_closed:
self._last_summary_network_failure = True
logger.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
@ -2382,6 +2400,7 @@ This compaction should PRIORITISE preserving all information related to the focu
self._last_aux_model_failure_model = None
self._last_compress_aborted = False
self._last_summary_auth_failure = False
self._last_summary_network_failure = False
# Manual /compress (force=True) bypasses the failure cooldown so the
# user can retry immediately after an auto-compress abort. Without
@ -2498,15 +2517,21 @@ This compaction should PRIORITISE preserving all information related to the focu
# surface a warning.
# Default is False (historical behavior).
#
# EXCEPTION — auth failures always abort. A 401/403 from the summary
# call means the credential or endpoint is broken (invalid/blocked
# key, or a token pointed at the wrong inference host). Rotating into
# EXCEPTION — auth AND transient network failures always abort. A
# 401/403 from the summary call means the credential or endpoint is
# broken (invalid/blocked key, or a token pointed at the wrong
# inference host). A connection/stream-close error means the network
# blipped at the compaction moment (#29559). In BOTH cases rotating into
# a child session with a placeholder summary on a broken credential
# strands the user on a degraded session for zero benefit — every
# subsequent call fails the same way. So when the failure was an auth
# error we abort regardless of abort_on_summary_failure, preserving
# the conversation unchanged until the credential is fixed.
if not summary and (self.abort_on_summary_failure or self._last_summary_auth_failure):
if not summary and (
self.abort_on_summary_failure
or self._last_summary_auth_failure
or self._last_summary_network_failure
):
n_skipped = compress_end - compress_start
self._last_summary_dropped_count = 0 # nothing actually dropped
self._last_summary_fallback_used = False
@ -2521,6 +2546,15 @@ This compaction should PRIORITISE preserving all information related to the focu
"with /compress or start fresh with /new.",
n_skipped,
)
elif self._last_summary_network_failure:
logger.warning(
"Summary generation failed with a network/connection "
"error — aborting compression. %d message(s) preserved "
"unchanged; the session was NOT rotated. This is "
"transient: retry with /compress once connectivity "
"recovers, or continue the conversation as-is.",
n_skipped,
)
else:
logger.warning(
"Summary generation failed — aborting compression "

View file

@ -683,6 +683,47 @@ class TestAuthFailureAborts:
assert c._last_compress_aborted is False
assert len(result) < len(msgs) # middle window dropped
def test_generate_summary_flags_network_failure(self):
"""A connection/network error on the summary call flags
_last_summary_network_failure (#29559)."""
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(model="test", quiet_mode=True)
with patch(
"agent.context_compressor.call_llm",
side_effect=ConnectionError("Connection error."),
):
result = c._generate_summary(self._msgs())
assert result is None
assert c._last_summary_network_failure is True
assert c._last_summary_auth_failure is False
def test_compress_aborts_on_network_failure_despite_flag_false(self):
"""#29559/#25585: abort_on_summary_failure=False (default), but a
transient connection error must ABORT messages returned unchanged,
_last_compress_aborted=True NOT drop the middle window. Retrying once
the network recovers beats discarding context for a transient blip."""
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(
model="test",
quiet_mode=True,
protect_first_n=2,
protect_last_n=2,
abort_on_summary_failure=False,
)
msgs = self._msgs(12)
with patch(
"agent.context_compressor.call_llm",
side_effect=ConnectionError("Connection error."),
):
result = c.compress(msgs, current_tokens=999999, force=True)
# Session must NOT be compressed/rotated — same messages back.
assert result == msgs
assert len(result) == len(msgs)
assert c._last_compress_aborted is True
assert c._last_summary_network_failure is True
# Did NOT fall through to the static-fallback (drop-the-middle) path.
assert c._last_summary_fallback_used is False
def test_aux_model_auth_failure_recovers_on_main_no_abort(self):
"""A 401 from a DISTINCT auxiliary summary_model retries on the main
model; if main succeeds, the auth flag is cleared and compression is