diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 9bed919503..e9f4588fa6 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -338,6 +338,8 @@ class ContextCompressor(ContextEngine): self._context_probe_persistable = False self._previous_summary = None self._last_summary_error = None + self._last_summary_dropped_count = 0 + self._last_summary_fallback_used = False self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 @@ -441,6 +443,11 @@ class ContextCompressor(ContextEngine): self._ineffective_compression_count: int = 0 self._summary_failure_cooldown_until: float = 0.0 self._last_summary_error: Optional[str] = None + # When summary generation fails and a static fallback is inserted, + # record how many turns were unrecoverably dropped so callers + # (gateway hygiene, /compress) can surface a visible warning. + self._last_summary_dropped_count: int = 0 + self._last_summary_fallback_used: bool = False def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" @@ -1196,6 +1203,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio related to this topic and be more aggressive about compressing everything else. Inspired by Claude Code's ``/compact``. """ + # Reset per-call summary failure state — callers inspect these fields + # after compress() returns to decide whether to surface a warning. + self._last_summary_dropped_count = 0 + self._last_summary_fallback_used = False n_messages = len(messages) # Only need head + 3 tail messages minimum (token budget decides the real tail size) _min_for_compress = self.protect_first_n + 3 + 1 @@ -1274,6 +1285,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio if not self.quiet_mode: logger.warning("Summary generation failed — inserting static fallback context marker") n_dropped = compress_end - compress_start + self._last_summary_dropped_count = n_dropped + self._last_summary_fallback_used = True summary = ( f"{SUMMARY_PREFIX}\n" f"Summary generation was unavailable. {n_dropped} conversation turns were " diff --git a/gateway/run.py b/gateway/run.py index ac8f763b7f..5ef376a9b0 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -4800,6 +4800,34 @@ class GatewayRunner: "compression", f"{_new_tokens:,}", ) + + # If summary generation failed, the + # compressor inserted a static fallback + # placeholder and the dropped turns are + # gone for good. Surface a visible + # warning to the gateway user — agent.log + # alone is invisible on TG/Discord/etc. + _comp = getattr(_hyg_agent, "context_compressor", None) + if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False): + _dropped = getattr(_comp, "_last_summary_dropped_count", 0) + _err = getattr(_comp, "_last_summary_error", None) or "unknown error" + _warn_msg = ( + "⚠️ Context compression summary failed " + f"({_err}). {_dropped} historical message(s) " + "were removed and replaced with a placeholder. " + "Earlier context is no longer recoverable. " + "Consider /reset for a clean session, or check " + "your auxiliary.compression model configuration." + ) + try: + _adapter = self.adapters.get(source.platform) + if _adapter and source.chat_id: + await _adapter.send(source.chat_id, _warn_msg) + except Exception as _werr: + logger.warning( + "Failed to deliver compression-failure warning to user: %s", + _werr, + ) finally: self._cleanup_agent_resources(_hyg_agent) @@ -7343,6 +7371,12 @@ class GatewayRunner: approx_tokens, new_tokens, ) + # Detect summary-generation failure so we can surface a + # visible warning to the user even on the manual /compress + # path (otherwise the failure is silently logged). + _summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False)) + _dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0) + _summary_err = getattr(compressor, "_last_summary_error", None) finally: self._cleanup_agent_resources(tmp_agent) lines = [f"🗜️ {summary['headline']}"] @@ -7351,6 +7385,13 @@ class GatewayRunner: lines.append(summary["token_line"]) if summary["note"]: lines.append(summary["note"]) + if _summary_failed: + lines.append( + f"⚠️ Summary generation failed ({_summary_err or 'unknown error'}). " + f"{_dropped_count} historical message(s) were removed and replaced " + "with a placeholder; earlier context is no longer recoverable. " + "Consider checking your auxiliary.compression model configuration." + ) return "\n".join(lines) except Exception as e: logger.warning("Manual compress failed: %s", e) diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 883745d6c8..582b517867 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -242,6 +242,72 @@ class TestSummaryFailureCooldown: assert mock_call.call_count == 1 +class TestSummaryFailureTrackingForGatewayWarning: + """When summary generation fails, the compressor must record dropped count + + fallback flag so gateway hygiene & /compress can surface a visible + warning instead of silently dropping context.""" + + def test_compress_records_fallback_and_dropped_count_on_summary_failure(self): + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "msg 1"}, + {"role": "assistant", "content": "msg 2"}, + {"role": "user", "content": "msg 3"}, + {"role": "assistant", "content": "msg 4"}, + {"role": "user", "content": "msg 5"}, + {"role": "assistant", "content": "msg 6"}, + {"role": "user", "content": "msg 7"}, + ] + + # Simulate summary LLM call failing — covers the 404 / model-not-found + # case from issue (auxiliary compression model misconfigured). + with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")): + result = c.compress(msgs) + + assert c._last_summary_fallback_used is True + assert c._last_summary_dropped_count > 0 + assert c._last_summary_error is not None + # Result must still be well-formed (fallback summary present). + assert any( + isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"] + for m in result + ) + + def test_compress_clears_fallback_flag_on_subsequent_success(self): + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "summary text" + + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "msg 1"}, + {"role": "assistant", "content": "msg 2"}, + {"role": "user", "content": "msg 3"}, + {"role": "assistant", "content": "msg 4"}, + {"role": "user", "content": "msg 5"}, + {"role": "assistant", "content": "msg 6"}, + {"role": "user", "content": "msg 7"}, + ] + + # First call fails, second succeeds — flag must reset on second compress. + with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")): + c.compress(msgs) + assert c._last_summary_fallback_used is True + + # Reset cooldown to allow retry on second compress + c._summary_failure_cooldown_until = 0.0 + with patch("agent.context_compressor.call_llm", return_value=mock_response): + c.compress(msgs) + assert c._last_summary_fallback_used is False + assert c._last_summary_dropped_count == 0 + + class TestSummaryPrefixNormalization: def test_legacy_prefix_is_replaced(self): summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")