From 7039020c9fbe84114c67610b9f54df5af421034e Mon Sep 17 00:00:00 2001
From: Tranquil-Flow <tranquil_flow@protonmail.com>
Date: Thu, 23 Apr 2026 20:27:17 +1000
Subject: [PATCH] fix(agent): preserve messages when compression summary
 generation fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When _generate_summary() fails (timeout, rate limit, no provider, HTTP
529), compress() previously destroyed all middle conversation turns and
replaced them with a static placeholder string.  This caused permanent,
unrecoverable data loss — the model lost all context of earlier work.

The fix is simple: when summary generation returns None, return the
original messages unchanged instead of proceeding with the destruction.
The conversation stays intact and compression can be retried on the
next trigger.

Fixes #10719, #12131, #11914, #11585
---
 agent/context_compressor.py            |  34 +++---
 tests/agent/test_context_compressor.py | 144 +++++++++++++++++++++++--
 2 files changed, 158 insertions(+), 20 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index f8036851fc..345fd81f93 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -817,8 +817,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             # No provider configured — long cooldown, unlikely to self-resolve
             self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
             logging.warning("Context compression: no provider available for "
-                            "summary. Middle turns will be dropped without summary "
-                            "for %d seconds.",
+                            "summary. Preserving original messages and pausing "
+                            "summary attempts for %d seconds.",
                             _SUMMARY_FAILURE_COOLDOWN_SECONDS)
             return None
         except Exception as e:
@@ -1135,6 +1135,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
 
         display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
 
+        # Snapshot the original messages before any lossy modifications.
+        # _prune_old_tool_results() returns a NEW list (via [m.copy() for m in messages])
+        # so rebinding 'messages' below does not affect original_messages.
+        # If summary generation fails later, we return this exact list so
+        # the caller's conversation history is completely unmodified.
+        original_messages = messages
+
         # Phase 1: Prune old tool results (cheap, no LLM call)
         messages, pruned_count = self._prune_old_tool_results(
             messages, protect_tail_count=self.protect_last_n,
@@ -1151,7 +1158,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
 
         if compress_start >= compress_end:
-            return messages
+            return original_messages
 
         turns_to_summarize = messages[compress_start:compress_end]
 
@@ -1194,19 +1201,18 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                     )
             compressed.append(msg)
 
-        # If LLM summary failed, insert a static fallback so the model
-        # knows context was lost rather than silently dropping everything.
+        # If LLM summary failed, preserve the original messages rather than
+        # destroying conversation history.  Dropping turns without a summary
+        # causes permanent data loss — the model loses context of earlier work
+        # with no way to recover it.  (#10719, #12131, #11914, #11585)
         if not summary:
             if not self.quiet_mode:
-                logger.warning("Summary generation failed — inserting static fallback context marker")
-            n_dropped = compress_end - compress_start
-            summary = (
-                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} conversation turns were "
-                f"removed to free context space but could not be summarized. The removed "
-                f"turns contained earlier work in this session. Continue based on the "
-                f"recent messages below and the current state of any files or resources."
-            )
+                logger.warning(
+                    "Summary generation failed — preserving original messages "
+                    "to prevent data loss (%d middle turns kept intact)",
+                    compress_end - compress_start,
+                )
+            return original_messages
 
         _merge_summary_into_tail = False
         last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 8072a58d98..41b024a9b2 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -65,9 +65,10 @@ class TestCompress:
         assert result == msgs
 
     def test_truncation_fallback_no_client(self, compressor):
-        # compressor has client=None, so should use truncation fallback
+        # With a mocked summary, compression should reduce message count
         msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10)
-        result = compressor.compress(msgs)
+        with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
+            result = compressor.compress(msgs)
         assert len(result) < len(msgs)
         # Should keep system message and last N
         assert result[0]["role"] == "system"
@@ -75,14 +76,17 @@ class TestCompress:
 
     def test_compression_increments_count(self, compressor):
         msgs = self._make_messages(10)
-        compressor.compress(msgs)
+        with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
+            compressor.compress(msgs)
         assert compressor.compression_count == 1
-        compressor.compress(msgs)
+        with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary 2"):
+            compressor.compress(msgs)
         assert compressor.compression_count == 2
 
     def test_protects_first_and_last(self, compressor):
         msgs = self._make_messages(10)
-        result = compressor.compress(msgs)
+        with patch.object(compressor, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
+            result = compressor.compress(msgs)
         # First 2 messages should be preserved (protect_first_n=2)
         # Last 2 messages should be preserved (protect_last_n=2)
         assert result[-1]["content"] == msgs[-1]["content"]
@@ -93,6 +97,133 @@ class TestCompress:
         assert msgs[-2]["content"] in result[-2]["content"]
 
 
+class TestPreserveOnSummaryFailure:
+    """Regression: when summary generation fails, compress() must return messages
+    unchanged instead of destroying middle turns.  (#10719, #12131, #11914, #11585)"""
+
+    def test_summary_failure_preserves_all_messages(self):
+        """When _generate_summary returns None, no messages should be dropped."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
+
+        with patch.object(c, "_generate_summary", return_value=None):
+            result = c.compress(msgs)
+
+        assert len(result) == len(msgs), (
+            f"Messages were dropped despite summary failure: {len(msgs)} -> {len(result)}"
+        )
+
+    def test_summary_failure_preserves_content(self):
+        """Every original message should be present after a failed compression."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
+        original_contents = [m["content"] for m in msgs]
+
+        with patch.object(c, "_generate_summary", return_value=None):
+            result = c.compress(msgs)
+
+        result_contents = [m["content"] for m in result]
+        for content in original_contents:
+            assert content in result_contents, f"Message '{content}' was lost"
+
+    def test_summary_failure_does_not_increment_compression_count(self):
+        """A failed compression should not count as a successful one."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
+
+        with patch.object(c, "_generate_summary", return_value=None):
+            c.compress(msgs)
+
+        assert c.compression_count == 0
+
+    def test_successful_summary_still_compresses(self):
+        """Verify the normal path still works — compression happens when summary succeeds."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
+
+        with patch.object(c, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary of work"):
+            result = c.compress(msgs)
+
+        assert len(result) < len(msgs)
+        assert c.compression_count == 1
+
+    def test_transient_llm_error_preserves_messages(self):
+        """Simulate a real transient error (timeout/rate limit) through the full
+        _generate_summary path — messages should be preserved."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
+
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("Connection timeout")):
+            result = c.compress(msgs)
+
+        assert len(result) == len(msgs), "Transient LLM error should not cause data loss"
+
+    def test_runtime_error_no_provider_preserves_messages(self):
+        """When no compression provider is configured (RuntimeError), messages
+        should be preserved rather than destroyed."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
+
+        with patch("agent.context_compressor.call_llm", side_effect=RuntimeError("No provider")):
+            result = c.compress(msgs)
+
+        assert len(result) == len(msgs), "Missing provider should not cause data loss"
+
+    def test_summary_failure_preserves_messages_exactly_after_tool_pruning_would_run(self):
+        """On summary failure, compress() must return the exact original message
+        list — including any large tool outputs that _prune_old_tool_results
+        would normally replace with 1-line summaries.
+
+        This is the key regression test: the pre-summary pruning pass is lossy
+        (it replaces tool outputs with summaries), so returning the post-pruning
+        list on summary failure silently mutates the caller's conversation.
+        The fix saves original_messages before pruning and returns that instead.
+        """
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        # Unique 500-char tool output that pruning would replace with a summary
+        unique_tool_output = "UNIQUE_MARKER_" + ("x" * 486)
+        msgs = [
+            {"role": "user", "content": "msg 0"},
+            {"role": "assistant", "content": "msg 1"},
+            {"role": "user", "content": "msg 2"},
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "c1", "type": "function", "function": {"name": "terminal", "arguments": '{"command": "ls"}'}},
+            ]},
+            {"role": "tool", "content": unique_tool_output, "tool_call_id": "c1"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+            {"role": "assistant", "content": "msg 8"},
+            {"role": "user", "content": "msg 9"},
+        ]
+
+        with patch.object(c, "_generate_summary", return_value=None):
+            result = c.compress(msgs)
+
+        # Exact preservation — not just same length, same object
+        assert result == msgs, (
+            "Summary failure must return the exact original messages, "
+            "not the post-pruning version"
+        )
+        assert result is msgs, (
+            "Summary failure should return the same list object"
+        )
+
+
 class TestGenerateSummaryNoneContent:
     """Regression: content=None (from tool-call-only assistant messages) must not crash."""
 
@@ -128,7 +259,8 @@ class TestGenerateSummaryNoneContent:
             {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
             for i in range(10)
         ]
-        result = c.compress(msgs)
+        with patch.object(c, "_generate_summary", return_value=f"{SUMMARY_PREFIX}\nSummary"):
+            result = c.compress(msgs)
         assert len(result) < len(msgs)