fix(compression): decay protect_first_n so early turns don't fossilize (#11996)

protect_first_n keeps the first N non-system messages verbatim through compaction so the original task framing survives. But it was applied on EVERY compression pass: the same early user turns were re-copied into each child session and never summarized away, so across a long, repeatedly- compressed session those old messages became immortal and grew the protected head unboundedly (#11996, P1). Decay it: protect_first_n applies on the FIRST compaction only. Once the session has been compressed at least once (compression_count >= 1, or a handoff summary already exists), the early turns are captured in the summary, so _effective_protect_first_n() returns 0 and only the system prompt stays protected. The decay is read at compress_start computation time, before compression_count/_previous_summary are mutated at the end of compress(), so the first pass still protects correctly. Co-authored-by: truenorth-lj <liliangjya@gmail.com> Co-authored-by: davidvv <david.vv@icloud.com>
2026-06-23 10:42:00 +00:00 · 2026-06-20 23:39:08 -07:00 · 2026-06-20 23:39:08 -07:00 · 14ef6312b5
commit 14ef6312b5
parent c6bf6bda90
2 changed files with 57 additions and 2 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -1849,6 +1849,23 @@ This compaction should PRIORITISE preserving all information related to the focu
            idx += 1
        return idx

+    def _effective_protect_first_n(self) -> int:
+        """``protect_first_n`` decayed across compression cycles.
+
+        ``protect_first_n`` keeps the first N non-system messages verbatim so
+        the original task framing survives the FIRST compaction. But applying
+        it on every subsequent pass fossilizes those early turns — they're
+        re-copied into each child session and never summarized away, so old
+        user messages become immortal and grow the head unboundedly across a
+        long session (#11996). Once the session has been compressed at least
+        once, the early turns are already captured in the handoff summary, so
+        there's no need to keep re-protecting them: decay to 0 (the system
+        prompt is still always protected separately by _protect_head_size).
+        """
+        if self.compression_count >= 1 or self._previous_summary:
+            return 0
+        return self.protect_first_n
+
    def _protect_head_size(self, messages: List[Dict[str, Any]]) -> int:
        """Total count of head messages to protect.

@ -1860,14 +1877,19 @@ This compaction should PRIORITISE preserving all information related to the focu
        the ``messages`` list (e.g. the gateway ``/compress`` handler
        strips it before calling compress()).

-        Examples:
+        The ``protect_first_n`` portion DECAYS after the first compression
+        (see _effective_protect_first_n) so early user turns don't fossilize
+        across repeated compactions (#11996).
+
+        Examples (first compaction):
          protect_first_n=0 → system prompt only (or nothing if no system msg)
          protect_first_n=3 → system + first 3 non-system messages
+        After the first compaction: system prompt only.
        """
        head = 0
        if messages and messages[0].get("role") == "system":
            head = 1
-        return head + self.protect_first_n
+        return head + self._effective_protect_first_n()

    def _align_boundary_backward(self, messages: List[Dict[str, Any]], idx: int) -> int:
        """Pull a compress-end boundary backward to avoid splitting a
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -225,6 +225,39 @@ class TestCompress:
        # original content is present in either case.
        assert msgs[-2]["content"] in result[-2]["content"]

+    def test_protect_first_n_decays_after_first_compression(self):
+        """Regression for #11996: protect_first_n must protect early turns on
+        the FIRST compaction but DECAY afterwards, so the same early user
+        messages don't get re-copied verbatim into every child session and
+        fossilize (grow immortal) across a long, repeatedly-compressed
+        session. The system prompt is always protected separately."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3)
+
+        msgs = [{"role": "system", "content": "sys"}] + [
+            {"role": "user" if i % 2 == 0 else "assistant", "content": f"m{i}"}
+            for i in range(10)
+        ]
+
+        # First compaction: protect system + first 3 non-system.
+        assert c.compression_count == 0
+        assert c._effective_protect_first_n() == 3
+        assert c._protect_head_size(msgs) == 1 + 3
+
+        # Simulate having compressed once — early turns now live in the summary.
+        c.compression_count = 1
+        assert c._effective_protect_first_n() == 0
+        assert c._protect_head_size(msgs) == 1  # system prompt only
+
+    def test_protect_first_n_decays_when_previous_summary_exists(self):
+        """Even if compression_count was reset, an existing handoff summary
+        means the early turns are already captured — decay still applies."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3)
+        c.compression_count = 0
+        c._previous_summary = "[CONTEXT SUMMARY]: earlier work"
+        assert c._effective_protect_first_n() == 0
+

 class TestGenerateSummaryNoneContent:
    """Regression: content=None (from tool-call-only assistant messages) must not crash."""