mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
fix(compression): abort on auth failure instead of rotating into a degraded session
When the auxiliary summary call fails with an authentication/permission error (HTTP 401/403), context compression now ABORTS and preserves the session unchanged instead of rotating into a child session with a placeholder summary. Before: a 401 (invalid/blocked key, or a token pointed at the wrong inference host) fell through every transient-error check to 'return None', and because compression.abort_on_summary_failure defaults False, compress() took the static-fallback path and rotated the session anyway (messages N->N). The user landed on a fresh-but-broken session that kept failing the same way — paying for a full-context API call each turn with no useful compression. After: _generate_summary classifies 401/403 as a non-recoverable auth failure (_last_summary_auth_failure) and compress() aborts on it regardless of abort_on_summary_failure. A distinct auxiliary summary_model that 401s still retries once on the main model first (its dedicated creds may be the only broken thing); the abort only sticks when the main model itself auth-fails or the fallback also auth-fails. The existing _last_compress_aborted handling in conversation_compression.py already skips rotation and emits a warning, so no session rotation occurs. Tests: TestAuthFailureAborts — 401/403 flagging, compress() aborts despite flag=False, non-auth failures keep the historical fallback path, and aux-model auth failure recovers on main without aborting.
This commit is contained in:
parent
f22dd8a75a
commit
5a53e0f0f4
2 changed files with 161 additions and 8 deletions
|
|
@ -761,6 +761,14 @@ class ContextCompressor(ContextEngine):
|
|||
# this flag to know "compression was attempted but aborted, freeze
|
||||
# the chat until the user manually retries via /compress".
|
||||
self._last_compress_aborted: bool = False
|
||||
# Set True when the summary call failed with an authentication /
|
||||
# permission error (HTTP 401/403). Auth failures are non-recoverable
|
||||
# at the request level — the credential or endpoint is broken — so
|
||||
# compress() must ABORT (preserve the session unchanged) rather than
|
||||
# rotate into a degraded child session with a placeholder summary.
|
||||
# This is independent of the abort_on_summary_failure config flag:
|
||||
# rotating on a broken credential is never the right behavior.
|
||||
self._last_summary_auth_failure: bool = False
|
||||
# When a user-configured summary model fails and we recover by
|
||||
# retrying on the main model, record the failure so gateway /
|
||||
# CLI callers can still warn the user even though compression
|
||||
|
|
@ -1524,6 +1532,7 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
self._last_summary_auth_failure = False
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
|
|
@ -1571,6 +1580,26 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
# back to the main model instead of entering a 60-second cooldown.
|
||||
# See issue #18458.
|
||||
_is_streaming_closed = _is_connection_error(e)
|
||||
# Authentication / permission failures (401/403) are NOT transient
|
||||
# and NOT fixable by retrying the same request: the credential is
|
||||
# invalid/blocked/expired or the endpoint is wrong (e.g. a prod
|
||||
# token sent to a staging inference URL). Flag them so compress()
|
||||
# aborts and preserves the session instead of rotating into a
|
||||
# degraded child with a placeholder summary. We still allow the
|
||||
# one-shot fallback to the MAIN model below when the failure came
|
||||
# from a distinct auxiliary summary_model (its dedicated creds may
|
||||
# be the only broken thing); only a failure on the main model — or
|
||||
# a fallback that also auth-fails — makes the abort stick.
|
||||
_is_auth_error = (
|
||||
_status in {401, 403}
|
||||
or "invalid api key" in _err_str
|
||||
or "invalid x-api-key" in _err_str
|
||||
or ("api key" in _err_str and ("invalid" in _err_str or "blocked" in _err_str))
|
||||
or "unauthorized" in _err_str
|
||||
or "authentication" in _err_str
|
||||
)
|
||||
if _is_auth_error:
|
||||
self._last_summary_auth_failure = True
|
||||
if _is_json_decode and not _is_model_not_found and not _is_timeout:
|
||||
logger.error(
|
||||
"Context compression failed: auxiliary LLM returned a "
|
||||
|
|
@ -2178,6 +2207,7 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
self._last_compress_aborted = False
|
||||
self._last_summary_auth_failure = False
|
||||
|
||||
# Manual /compress (force=True) bypasses the failure cooldown so the
|
||||
# user can retry immediately after an auto-compress abort. Without
|
||||
|
|
@ -2293,19 +2323,38 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
# _last_summary_dropped_count for gateway hygiene to
|
||||
# surface a warning.
|
||||
# Default is False (historical behavior).
|
||||
if not summary and self.abort_on_summary_failure:
|
||||
#
|
||||
# EXCEPTION — auth failures always abort. A 401/403 from the summary
|
||||
# call means the credential or endpoint is broken (invalid/blocked
|
||||
# key, or a token pointed at the wrong inference host). Rotating into
|
||||
# a child session with a placeholder summary on a broken credential
|
||||
# strands the user on a degraded session for zero benefit — every
|
||||
# subsequent call fails the same way. So when the failure was an auth
|
||||
# error we abort regardless of abort_on_summary_failure, preserving
|
||||
# the conversation unchanged until the credential is fixed.
|
||||
if not summary and (self.abort_on_summary_failure or self._last_summary_auth_failure):
|
||||
n_skipped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = 0 # nothing actually dropped
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_compress_aborted = True
|
||||
if not self.quiet_mode:
|
||||
logger.warning(
|
||||
"Summary generation failed — aborting compression "
|
||||
"(compression.abort_on_summary_failure=true). "
|
||||
"%d message(s) preserved unchanged. Conversation is "
|
||||
"frozen until the next /compress or /new.",
|
||||
n_skipped,
|
||||
)
|
||||
if self._last_summary_auth_failure:
|
||||
logger.warning(
|
||||
"Summary generation failed with an authentication "
|
||||
"error — aborting compression. %d message(s) preserved "
|
||||
"unchanged; the session was NOT rotated. Check your "
|
||||
"provider credential / inference endpoint, then retry "
|
||||
"with /compress or start fresh with /new.",
|
||||
n_skipped,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Summary generation failed — aborting compression "
|
||||
"(compression.abort_on_summary_failure=true). "
|
||||
"%d message(s) preserved unchanged. Conversation is "
|
||||
"frozen until the next /compress or /new.",
|
||||
n_skipped,
|
||||
)
|
||||
return messages
|
||||
|
||||
# Phase 4: Assemble compressed message list
|
||||
|
|
|
|||
|
|
@ -365,6 +365,110 @@ class TestSummaryFailureCooldown:
|
|||
assert mock_call.call_count == 1
|
||||
|
||||
|
||||
class TestAuthFailureAborts:
|
||||
"""A 401/403 on the summary call must ABORT compression (preserve the
|
||||
session unchanged) instead of rotating into a degraded child session
|
||||
with a placeholder summary — regardless of abort_on_summary_failure.
|
||||
|
||||
Real incident: a nous token pointed at a stale staging inference URL
|
||||
401'd on every compression attempt, and because abort_on_summary_failure
|
||||
defaults False the session rotated anyway (messages N->N), stranding the
|
||||
user on a fresh-but-broken session that kept failing the same way.
|
||||
"""
|
||||
|
||||
def _msgs(self, n=10):
|
||||
return [
|
||||
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
||||
for i in range(n)
|
||||
]
|
||||
|
||||
def _auth_err(self, status=401):
|
||||
err = Exception(
|
||||
f"Error code: {status} - "
|
||||
"{'status': 401, 'message': 'Your API key is invalid, blocked or out of funds.'}"
|
||||
)
|
||||
err.status_code = status
|
||||
return err
|
||||
|
||||
def test_generate_summary_flags_auth_failure(self):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
with patch("agent.context_compressor.call_llm", side_effect=self._auth_err(401)):
|
||||
result = c._generate_summary(self._msgs())
|
||||
assert result is None
|
||||
assert c._last_summary_auth_failure is True
|
||||
|
||||
def test_403_also_flags_auth_failure(self):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
with patch("agent.context_compressor.call_llm", side_effect=self._auth_err(403)):
|
||||
c._generate_summary(self._msgs())
|
||||
assert c._last_summary_auth_failure is True
|
||||
|
||||
def test_compress_aborts_on_auth_failure_despite_flag_false(self):
|
||||
"""abort_on_summary_failure=False (the default), but a 401 must still
|
||||
abort: messages returned unchanged, _last_compress_aborted=True."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="test",
|
||||
quiet_mode=True,
|
||||
protect_first_n=2,
|
||||
protect_last_n=2,
|
||||
abort_on_summary_failure=False,
|
||||
)
|
||||
msgs = self._msgs(12)
|
||||
with patch("agent.context_compressor.call_llm", side_effect=self._auth_err(401)):
|
||||
result = c.compress(msgs, current_tokens=999999, force=True)
|
||||
# Session must NOT be compressed/rotated — same messages back.
|
||||
assert result == msgs
|
||||
assert len(result) == len(msgs)
|
||||
assert c._last_compress_aborted is True
|
||||
assert c._last_summary_auth_failure is True
|
||||
# Did NOT fall through to the static-fallback (drop-the-middle) path.
|
||||
assert c._last_summary_fallback_used is False
|
||||
|
||||
def test_non_auth_failure_still_uses_fallback_path(self):
|
||||
"""A generic (non-auth) failure with abort_on_summary_failure=False
|
||||
keeps the historical behavior: insert a static fallback + drop the
|
||||
middle window (does NOT abort)."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="test",
|
||||
quiet_mode=True,
|
||||
protect_first_n=2,
|
||||
protect_last_n=2,
|
||||
abort_on_summary_failure=False,
|
||||
)
|
||||
msgs = self._msgs(12)
|
||||
with patch("agent.context_compressor.call_llm", side_effect=Exception("boom 500")):
|
||||
result = c.compress(msgs, current_tokens=999999, force=True)
|
||||
assert c._last_summary_auth_failure is False
|
||||
assert c._last_compress_aborted is False
|
||||
assert len(result) < len(msgs) # middle window dropped
|
||||
|
||||
def test_aux_model_auth_failure_recovers_on_main_no_abort(self):
|
||||
"""A 401 from a DISTINCT auxiliary summary_model retries on the main
|
||||
model; if main succeeds, the auth flag is cleared and compression is
|
||||
NOT aborted (the aux creds were the only broken thing)."""
|
||||
mock_ok = MagicMock()
|
||||
mock_ok.choices = [MagicMock()]
|
||||
mock_ok.choices[0].message.content = "summary via main model"
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="main-model",
|
||||
summary_model_override="broken-aux-model",
|
||||
quiet_mode=True,
|
||||
)
|
||||
with patch(
|
||||
"agent.context_compressor.call_llm",
|
||||
side_effect=[self._auth_err(401), mock_ok],
|
||||
) as mock_call:
|
||||
result = c._generate_summary(self._msgs())
|
||||
assert mock_call.call_count == 2
|
||||
assert isinstance(result, str)
|
||||
assert c._last_summary_auth_failure is False # cleared on success
|
||||
|
||||
|
||||
class TestSummaryFallbackToMainModel:
|
||||
"""When ``summary_model`` differs from the main model and the summary LLM
|
||||
call fails, the compressor should retry once on the main model before
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue