mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
When context compaction's summary generation fails, the compressor's default path (abort_on_summary_failure=False) drops the middle window and inserts a static 'summary unavailable' marker — destroying the compacted turns. #29559 reported the field impact: a Connection error at the compaction moment dropped 124->15 messages (110 lost) for a long browser-automation task; #25585 is the same failure mode (failed summary commits a destructive compaction anyway). compress() already has an EXCEPTION to the historical drop default: auth failures (401/403) ALWAYS abort and preserve the session, because rotating into a placeholder-summary child on a broken credential strands the user. A transient network/connection error is the same situation in reverse: it WILL recover, and retrying then is strictly better than discarding context for a momentary blip. Extend the always-abort carve-out to terminal connection/network failures: - new _last_summary_network_failure flag, set in _generate_summary's terminal failure branch when _is_connection_error(e) (reached only after any main-model fallback is exhausted), reset alongside the auth flag; - compress() aborts when it's set (returns messages unchanged, _last_compress_aborted=True), independent of abort_on_summary_failure; - a network-specific operator warning (distinct from the auth + config-flag messages). Scoped to connection errors only: a generic 500/400 still takes the historical fallback-drop path (test_non_auth_failure_still_uses_fallback_path stays green). Tests: network-failure detection + abort-despite-flag-false, both mutation-checked (removing the flag-set fails detection; removing the carve-out fails the abort).
This commit is contained in:
parent
a4a74ca9e9
commit
ac822e4d36
2 changed files with 80 additions and 5 deletions
|
|
@ -890,7 +890,15 @@ class ContextCompressor(ContextEngine):
|
|||
# This is independent of the abort_on_summary_failure config flag:
|
||||
# rotating on a broken credential is never the right behavior.
|
||||
self._last_summary_auth_failure: bool = False
|
||||
# When a user-configured summary model fails and we recover by
|
||||
# Set when summary generation ultimately fails due to a transient
|
||||
# network/connection error (httpx/httpcore connection drop, premature
|
||||
# stream close, etc.) — distinct from auth failures but treated the
|
||||
# same way by compress(): ABORT and preserve the session unchanged
|
||||
# rather than destroy the middle window for a deterministic
|
||||
# "summary unavailable" marker. Retrying once the network recovers is
|
||||
# strictly better than discarding context for a transient blip
|
||||
# (#29559, #25585). Independent of abort_on_summary_failure.
|
||||
self._last_summary_network_failure: bool = False
|
||||
# retrying on the main model, record the failure so gateway /
|
||||
# CLI callers can still warn the user even though compression
|
||||
# succeeded. Silent recovery would hide the broken config.
|
||||
|
|
@ -1687,6 +1695,7 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
self._last_summary_auth_failure = False
|
||||
self._last_summary_network_failure = False
|
||||
return self._with_summary_prefix(summary)
|
||||
except Exception as e:
|
||||
# ``call_llm`` raises ``RuntimeError`` for two very different cases:
|
||||
|
|
@ -1819,6 +1828,15 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
# A terminal connection/network failure (we reach this branch only
|
||||
# after any main-model fallback has already been tried or is
|
||||
# unavailable). Flag it so compress() ABORTS and preserves the
|
||||
# session unchanged instead of destroying the middle window for a
|
||||
# placeholder marker — retrying once the network recovers is
|
||||
# strictly better than dropping context (#29559, #25585). Mirrors
|
||||
# the auth-failure carve-out; independent of abort_on_summary_failure.
|
||||
if _is_streaming_closed:
|
||||
self._last_summary_network_failure = True
|
||||
logger.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
|
|
@ -2382,6 +2400,7 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
self._last_aux_model_failure_model = None
|
||||
self._last_compress_aborted = False
|
||||
self._last_summary_auth_failure = False
|
||||
self._last_summary_network_failure = False
|
||||
|
||||
# Manual /compress (force=True) bypasses the failure cooldown so the
|
||||
# user can retry immediately after an auto-compress abort. Without
|
||||
|
|
@ -2498,15 +2517,21 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
# surface a warning.
|
||||
# Default is False (historical behavior).
|
||||
#
|
||||
# EXCEPTION — auth failures always abort. A 401/403 from the summary
|
||||
# call means the credential or endpoint is broken (invalid/blocked
|
||||
# key, or a token pointed at the wrong inference host). Rotating into
|
||||
# EXCEPTION — auth AND transient network failures always abort. A
|
||||
# 401/403 from the summary call means the credential or endpoint is
|
||||
# broken (invalid/blocked key, or a token pointed at the wrong
|
||||
# inference host). A connection/stream-close error means the network
|
||||
# blipped at the compaction moment (#29559). In BOTH cases rotating into
|
||||
# a child session with a placeholder summary on a broken credential
|
||||
# strands the user on a degraded session for zero benefit — every
|
||||
# subsequent call fails the same way. So when the failure was an auth
|
||||
# error we abort regardless of abort_on_summary_failure, preserving
|
||||
# the conversation unchanged until the credential is fixed.
|
||||
if not summary and (self.abort_on_summary_failure or self._last_summary_auth_failure):
|
||||
if not summary and (
|
||||
self.abort_on_summary_failure
|
||||
or self._last_summary_auth_failure
|
||||
or self._last_summary_network_failure
|
||||
):
|
||||
n_skipped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = 0 # nothing actually dropped
|
||||
self._last_summary_fallback_used = False
|
||||
|
|
@ -2521,6 +2546,15 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
"with /compress or start fresh with /new.",
|
||||
n_skipped,
|
||||
)
|
||||
elif self._last_summary_network_failure:
|
||||
logger.warning(
|
||||
"Summary generation failed with a network/connection "
|
||||
"error — aborting compression. %d message(s) preserved "
|
||||
"unchanged; the session was NOT rotated. This is "
|
||||
"transient: retry with /compress once connectivity "
|
||||
"recovers, or continue the conversation as-is.",
|
||||
n_skipped,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Summary generation failed — aborting compression "
|
||||
|
|
|
|||
|
|
@ -683,6 +683,47 @@ class TestAuthFailureAborts:
|
|||
assert c._last_compress_aborted is False
|
||||
assert len(result) < len(msgs) # middle window dropped
|
||||
|
||||
def test_generate_summary_flags_network_failure(self):
|
||||
"""A connection/network error on the summary call flags
|
||||
_last_summary_network_failure (#29559)."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
with patch(
|
||||
"agent.context_compressor.call_llm",
|
||||
side_effect=ConnectionError("Connection error."),
|
||||
):
|
||||
result = c._generate_summary(self._msgs())
|
||||
assert result is None
|
||||
assert c._last_summary_network_failure is True
|
||||
assert c._last_summary_auth_failure is False
|
||||
|
||||
def test_compress_aborts_on_network_failure_despite_flag_false(self):
|
||||
"""#29559/#25585: abort_on_summary_failure=False (default), but a
|
||||
transient connection error must ABORT — messages returned unchanged,
|
||||
_last_compress_aborted=True — NOT drop the middle window. Retrying once
|
||||
the network recovers beats discarding context for a transient blip."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="test",
|
||||
quiet_mode=True,
|
||||
protect_first_n=2,
|
||||
protect_last_n=2,
|
||||
abort_on_summary_failure=False,
|
||||
)
|
||||
msgs = self._msgs(12)
|
||||
with patch(
|
||||
"agent.context_compressor.call_llm",
|
||||
side_effect=ConnectionError("Connection error."),
|
||||
):
|
||||
result = c.compress(msgs, current_tokens=999999, force=True)
|
||||
# Session must NOT be compressed/rotated — same messages back.
|
||||
assert result == msgs
|
||||
assert len(result) == len(msgs)
|
||||
assert c._last_compress_aborted is True
|
||||
assert c._last_summary_network_failure is True
|
||||
# Did NOT fall through to the static-fallback (drop-the-middle) path.
|
||||
assert c._last_summary_fallback_used is False
|
||||
|
||||
def test_aux_model_auth_failure_recovers_on_main_no_abort(self):
|
||||
"""A 401 from a DISTINCT auxiliary summary_model retries on the main
|
||||
model; if main succeeds, the auth flag is cleared and compression is
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue