diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 16db1bedc30..8d1bfebd5ff 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -761,6 +761,14 @@ class ContextCompressor(ContextEngine): # this flag to know "compression was attempted but aborted, freeze # the chat until the user manually retries via /compress". self._last_compress_aborted: bool = False + # Set True when the summary call failed with an authentication / + # permission error (HTTP 401/403). Auth failures are non-recoverable + # at the request level — the credential or endpoint is broken — so + # compress() must ABORT (preserve the session unchanged) rather than + # rotate into a degraded child session with a placeholder summary. + # This is independent of the abort_on_summary_failure config flag: + # rotating on a broken credential is never the right behavior. + self._last_summary_auth_failure: bool = False # When a user-configured summary model fails and we recover by # retrying on the main model, record the failure so gateway / # CLI callers can still warn the user even though compression @@ -1524,6 +1532,7 @@ This compaction should PRIORITISE preserving all information related to the focu self._summary_failure_cooldown_until = 0.0 self._summary_model_fallen_back = False self._last_summary_error = None + self._last_summary_auth_failure = False return self._with_summary_prefix(summary) except RuntimeError: # No provider configured — long cooldown, unlikely to self-resolve @@ -1571,6 +1580,26 @@ This compaction should PRIORITISE preserving all information related to the focu # back to the main model instead of entering a 60-second cooldown. # See issue #18458. _is_streaming_closed = _is_connection_error(e) + # Authentication / permission failures (401/403) are NOT transient + # and NOT fixable by retrying the same request: the credential is + # invalid/blocked/expired or the endpoint is wrong (e.g. a prod + # token sent to a staging inference URL). Flag them so compress() + # aborts and preserves the session instead of rotating into a + # degraded child with a placeholder summary. We still allow the + # one-shot fallback to the MAIN model below when the failure came + # from a distinct auxiliary summary_model (its dedicated creds may + # be the only broken thing); only a failure on the main model — or + # a fallback that also auth-fails — makes the abort stick. + _is_auth_error = ( + _status in {401, 403} + or "invalid api key" in _err_str + or "invalid x-api-key" in _err_str + or ("api key" in _err_str and ("invalid" in _err_str or "blocked" in _err_str)) + or "unauthorized" in _err_str + or "authentication" in _err_str + ) + if _is_auth_error: + self._last_summary_auth_failure = True if _is_json_decode and not _is_model_not_found and not _is_timeout: logger.error( "Context compression failed: auxiliary LLM returned a " @@ -2178,6 +2207,7 @@ This compaction should PRIORITISE preserving all information related to the focu self._last_aux_model_failure_error = None self._last_aux_model_failure_model = None self._last_compress_aborted = False + self._last_summary_auth_failure = False # Manual /compress (force=True) bypasses the failure cooldown so the # user can retry immediately after an auto-compress abort. Without @@ -2293,19 +2323,38 @@ This compaction should PRIORITISE preserving all information related to the focu # _last_summary_dropped_count for gateway hygiene to # surface a warning. # Default is False (historical behavior). - if not summary and self.abort_on_summary_failure: + # + # EXCEPTION — auth failures always abort. A 401/403 from the summary + # call means the credential or endpoint is broken (invalid/blocked + # key, or a token pointed at the wrong inference host). Rotating into + # a child session with a placeholder summary on a broken credential + # strands the user on a degraded session for zero benefit — every + # subsequent call fails the same way. So when the failure was an auth + # error we abort regardless of abort_on_summary_failure, preserving + # the conversation unchanged until the credential is fixed. + if not summary and (self.abort_on_summary_failure or self._last_summary_auth_failure): n_skipped = compress_end - compress_start self._last_summary_dropped_count = 0 # nothing actually dropped self._last_summary_fallback_used = False self._last_compress_aborted = True if not self.quiet_mode: - logger.warning( - "Summary generation failed — aborting compression " - "(compression.abort_on_summary_failure=true). " - "%d message(s) preserved unchanged. Conversation is " - "frozen until the next /compress or /new.", - n_skipped, - ) + if self._last_summary_auth_failure: + logger.warning( + "Summary generation failed with an authentication " + "error — aborting compression. %d message(s) preserved " + "unchanged; the session was NOT rotated. Check your " + "provider credential / inference endpoint, then retry " + "with /compress or start fresh with /new.", + n_skipped, + ) + else: + logger.warning( + "Summary generation failed — aborting compression " + "(compression.abort_on_summary_failure=true). " + "%d message(s) preserved unchanged. Conversation is " + "frozen until the next /compress or /new.", + n_skipped, + ) return messages # Phase 4: Assemble compressed message list diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 7eb1e8a57b0..c1188562998 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -365,6 +365,110 @@ class TestSummaryFailureCooldown: assert mock_call.call_count == 1 +class TestAuthFailureAborts: + """A 401/403 on the summary call must ABORT compression (preserve the + session unchanged) instead of rotating into a degraded child session + with a placeholder summary — regardless of abort_on_summary_failure. + + Real incident: a nous token pointed at a stale staging inference URL + 401'd on every compression attempt, and because abort_on_summary_failure + defaults False the session rotated anyway (messages N->N), stranding the + user on a fresh-but-broken session that kept failing the same way. + """ + + def _msgs(self, n=10): + return [ + {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} + for i in range(n) + ] + + def _auth_err(self, status=401): + err = Exception( + f"Error code: {status} - " + "{'status': 401, 'message': 'Your API key is invalid, blocked or out of funds.'}" + ) + err.status_code = status + return err + + def test_generate_summary_flags_auth_failure(self): + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True) + with patch("agent.context_compressor.call_llm", side_effect=self._auth_err(401)): + result = c._generate_summary(self._msgs()) + assert result is None + assert c._last_summary_auth_failure is True + + def test_403_also_flags_auth_failure(self): + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True) + with patch("agent.context_compressor.call_llm", side_effect=self._auth_err(403)): + c._generate_summary(self._msgs()) + assert c._last_summary_auth_failure is True + + def test_compress_aborts_on_auth_failure_despite_flag_false(self): + """abort_on_summary_failure=False (the default), but a 401 must still + abort: messages returned unchanged, _last_compress_aborted=True.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="test", + quiet_mode=True, + protect_first_n=2, + protect_last_n=2, + abort_on_summary_failure=False, + ) + msgs = self._msgs(12) + with patch("agent.context_compressor.call_llm", side_effect=self._auth_err(401)): + result = c.compress(msgs, current_tokens=999999, force=True) + # Session must NOT be compressed/rotated — same messages back. + assert result == msgs + assert len(result) == len(msgs) + assert c._last_compress_aborted is True + assert c._last_summary_auth_failure is True + # Did NOT fall through to the static-fallback (drop-the-middle) path. + assert c._last_summary_fallback_used is False + + def test_non_auth_failure_still_uses_fallback_path(self): + """A generic (non-auth) failure with abort_on_summary_failure=False + keeps the historical behavior: insert a static fallback + drop the + middle window (does NOT abort).""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="test", + quiet_mode=True, + protect_first_n=2, + protect_last_n=2, + abort_on_summary_failure=False, + ) + msgs = self._msgs(12) + with patch("agent.context_compressor.call_llm", side_effect=Exception("boom 500")): + result = c.compress(msgs, current_tokens=999999, force=True) + assert c._last_summary_auth_failure is False + assert c._last_compress_aborted is False + assert len(result) < len(msgs) # middle window dropped + + def test_aux_model_auth_failure_recovers_on_main_no_abort(self): + """A 401 from a DISTINCT auxiliary summary_model retries on the main + model; if main succeeds, the auth flag is cleared and compression is + NOT aborted (the aux creds were the only broken thing).""" + mock_ok = MagicMock() + mock_ok.choices = [MagicMock()] + mock_ok.choices[0].message.content = "summary via main model" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor( + model="main-model", + summary_model_override="broken-aux-model", + quiet_mode=True, + ) + with patch( + "agent.context_compressor.call_llm", + side_effect=[self._auth_err(401), mock_ok], + ) as mock_call: + result = c._generate_summary(self._msgs()) + assert mock_call.call_count == 2 + assert isinstance(result, str) + assert c._last_summary_auth_failure is False # cleared on success + + class TestSummaryFallbackToMainModel: """When ``summary_model`` differs from the main model and the summary LLM call fails, the compressor should retry once on the main model before