From 7039020c9fbe84114c67610b9f54df5af421034e Mon Sep 17 00:00:00 2001 From: Tranquil-Flow Date: Thu, 23 Apr 2026 20:27:17 +1000 Subject: [PATCH] fix(agent): preserve messages when compression summary generation fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When _generate_summary() fails (timeout, rate limit, no provider, HTTP 529), compress() previously destroyed all middle conversation turns and replaced them with a static placeholder string. This caused permanent, unrecoverable data loss — the model lost all context of earlier work. The fix is simple: when summary generation returns None, return the original messages unchanged instead of proceeding with the destruction. The conversation stays intact and compression can be retried on the next trigger. Fixes #10719, #12131, #11914, #11585 --- agent/context_compressor.py | 34 +++--- tests/agent/test_context_compressor.py | 144 +++++++++++++++++++++++-- 2 files changed, 158 insertions(+), 20 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index f8036851fc..345fd81f93 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -817,8 +817,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio # No provider configured — long cooldown, unlikely to self-resolve self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS logging.warning("Context compression: no provider available for " - "summary. Middle turns will be dropped without summary " - "for %d seconds.", + "summary. Preserving original messages and pausing " + "summary attempts for %d seconds.", _SUMMARY_FAILURE_COOLDOWN_SECONDS) return None except Exception as e: @@ -1135,6 +1135,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages) + # Snapshot the original messages before any lossy modifications. + # _prune_old_tool_results() returns a NEW list (via [m.copy() for m in messages]) + # so rebinding 'messages' below does not affect original_messages. + # If summary generation fails later, we return this exact list so + # the caller's conversation history is completely unmodified. + original_messages = messages + # Phase 1: Prune old tool results (cheap, no LLM call) messages, pruned_count = self._prune_old_tool_results( messages, protect_tail_count=self.protect_last_n, @@ -1151,7 +1158,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio compress_end = self._find_tail_cut_by_tokens(messages, compress_start) if compress_start >= compress_end: - return messages + return original_messages turns_to_summarize = messages[compress_start:compress_end] @@ -1194,19 +1201,18 @@ The user has requested that this compaction PRIORITISE preserving all informatio ) compressed.append(msg) - # If LLM summary failed, insert a static fallback so the model - # knows context was lost rather than silently dropping everything. + # If LLM summary failed, preserve the original messages rather than + # destroying conversation history. Dropping turns without a summary + # causes permanent data loss — the model loses context of earlier work + # with no way to recover it. (#10719, #12131, #11914, #11585) if not summary: if not self.quiet_mode: - logger.warning("Summary generation failed — inserting static fallback context marker") - n_dropped = compress_end - compress_start - summary = ( - f"{SUMMARY_PREFIX}\n" - f"Summary generation was unavailable. {n_dropped} conversation turns were " - f"removed to free context space but could not be summarized. The removed " - f"turns contained earlier work in this session. Continue based on the " - f"recent messages below and the current state of any files or resources." - ) + logger.warning( + "Summary generation failed — preserving original messages " + "to prevent data loss (%d middle turns kept intact)", + compress_end - compress_start, + ) + return original_messages _merge_summary_into_tail = False last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user" diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 8072a58d98..41b024a9b2 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -65,9 +65,10 @@ class TestCompress: assert result == msgs def test_truncation_fallback_no_client(self, compressor): - # compressor has client=None, so should use truncation fallback + # With a mocked summary, compression should reduce message count msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10) - result = compressor.compress(msgs) + with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"): + result = compressor.compress(msgs) assert len(result) < len(msgs) # Should keep system message and last N assert result[0]["role"] == "system" @@ -75,14 +76,17 @@ class TestCompress: def test_compression_increments_count(self, compressor): msgs = self._make_messages(10) - compressor.compress(msgs) + with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"): + compressor.compress(msgs) assert compressor.compression_count == 1 - compressor.compress(msgs) + with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary 2"): + compressor.compress(msgs) assert compressor.compression_count == 2 def test_protects_first_and_last(self, compressor): msgs = self._make_messages(10) - result = compressor.compress(msgs) + with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"): + result = compressor.compress(msgs) # First 2 messages should be preserved (protect_first_n=2) # Last 2 messages should be preserved (protect_last_n=2) assert result[-1]["content"] == msgs[-1]["content"] @@ -93,6 +97,133 @@ class TestCompress: assert msgs[-2]["content"] in result[-2]["content"] +class TestPreserveOnSummaryFailure: + """Regression: when summary generation fails, compress() must return messages + unchanged instead of destroying middle turns. (#10719, #12131, #11914, #11585)""" + + def test_summary_failure_preserves_all_messages(self): + """When _generate_summary returns None, no messages should be dropped.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)] + + with patch.object(c, "_generate_summary", return_value=None): + result = c.compress(msgs) + + assert len(result) == len(msgs), ( + f"Messages were dropped despite summary failure: {len(msgs)} -> {len(result)}" + ) + + def test_summary_failure_preserves_content(self): + """Every original message should be present after a failed compression.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)] + original_contents = [m["content"] for m in msgs] + + with patch.object(c, "_generate_summary", return_value=None): + result = c.compress(msgs) + + result_contents = [m["content"] for m in result] + for content in original_contents: + assert content in result_contents, f"Message '{content}' was lost" + + def test_summary_failure_does_not_increment_compression_count(self): + """A failed compression should not count as a successful one.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)] + + with patch.object(c, "_generate_summary", return_value=None): + c.compress(msgs) + + assert c.compression_count == 0 + + def test_successful_summary_still_compresses(self): + """Verify the normal path still works — compression happens when summary succeeds.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)] + + with patch.object(c, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary of work"): + result = c.compress(msgs) + + assert len(result) < len(msgs) + assert c.compression_count == 1 + + def test_transient_llm_error_preserves_messages(self): + """Simulate a real transient error (timeout/rate limit) through the full + _generate_summary path — messages should be preserved.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)] + + with patch("agent.context_compressor.call_llm", side_effect=Exception("Connection timeout")): + result = c.compress(msgs) + + assert len(result) == len(msgs), "Transient LLM error should not cause data loss" + + def test_runtime_error_no_provider_preserves_messages(self): + """When no compression provider is configured (RuntimeError), messages + should be preserved rather than destroyed.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)] + + with patch("agent.context_compressor.call_llm", side_effect=RuntimeError("No provider")): + result = c.compress(msgs) + + assert len(result) == len(msgs), "Missing provider should not cause data loss" + + def test_summary_failure_preserves_messages_exactly_after_tool_pruning_would_run(self): + """On summary failure, compress() must return the exact original message + list — including any large tool outputs that _prune_old_tool_results + would normally replace with 1-line summaries. + + This is the key regression test: the pre-summary pruning pass is lossy + (it replaces tool outputs with summaries), so returning the post-pruning + list on summary failure silently mutates the caller's conversation. + The fix saves original_messages before pruning and returns that instead. + """ + with patch("agent.context_compressor.get_model_context_length", return_value=100000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + # Unique 500-char tool output that pruning would replace with a summary + unique_tool_output = "UNIQUE_MARKER_" + ("x" * 486) + msgs = [ + {"role": "user", "content": "msg 0"}, + {"role": "assistant", "content": "msg 1"}, + {"role": "user", "content": "msg 2"}, + {"role": "assistant", "content": None, "tool_calls": [ + {"id": "c1", "type": "function", "function": {"name": "terminal", "arguments": '{"command": "ls"}'}}, + ]}, + {"role": "tool", "content": unique_tool_output, "tool_call_id": "c1"}, + {"role": "user", "content": "msg 5"}, + {"role": "assistant", "content": "msg 6"}, + {"role": "user", "content": "msg 7"}, + {"role": "assistant", "content": "msg 8"}, + {"role": "user", "content": "msg 9"}, + ] + + with patch.object(c, "_generate_summary", return_value=None): + result = c.compress(msgs) + + # Exact preservation — not just same length, same object + assert result == msgs, ( + "Summary failure must return the exact original messages, " + "not the post-pruning version" + ) + assert result is msgs, ( + "Summary failure should return the same list object" + ) + + class TestGenerateSummaryNoneContent: """Regression: content=None (from tool-call-only assistant messages) must not crash.""" @@ -128,7 +259,8 @@ class TestGenerateSummaryNoneContent: {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10) ] - result = c.compress(msgs) + with patch.object(c, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"): + result = c.compress(msgs) assert len(result) < len(msgs)