fix(compression): decay protect_first_n so early turns don't fossilize (#11996)

protect_first_n keeps the first N non-system messages verbatim through
compaction so the original task framing survives. But it was applied on
EVERY compression pass: the same early user turns were re-copied into each
child session and never summarized away, so across a long, repeatedly-
compressed session those old messages became immortal and grew the
protected head unboundedly (#11996, P1).

Decay it: protect_first_n applies on the FIRST compaction only. Once the
session has been compressed at least once (compression_count >= 1, or a
handoff summary already exists), the early turns are captured in the
summary, so _effective_protect_first_n() returns 0 and only the system
prompt stays protected. The decay is read at compress_start computation
time, before compression_count/_previous_summary are mutated at the end of
compress(), so the first pass still protects correctly.

Co-authored-by: truenorth-lj <liliangjya@gmail.com>
Co-authored-by: davidvv <david.vv@icloud.com>
This commit is contained in:
teknium1 2026-06-20 23:39:08 -07:00 committed by Teknium
parent c6bf6bda90
commit 14ef6312b5
2 changed files with 57 additions and 2 deletions

View file

@ -1849,6 +1849,23 @@ This compaction should PRIORITISE preserving all information related to the focu
idx += 1
return idx
def _effective_protect_first_n(self) -> int:
"""``protect_first_n`` decayed across compression cycles.
``protect_first_n`` keeps the first N non-system messages verbatim so
the original task framing survives the FIRST compaction. But applying
it on every subsequent pass fossilizes those early turns they're
re-copied into each child session and never summarized away, so old
user messages become immortal and grow the head unboundedly across a
long session (#11996). Once the session has been compressed at least
once, the early turns are already captured in the handoff summary, so
there's no need to keep re-protecting them: decay to 0 (the system
prompt is still always protected separately by _protect_head_size).
"""
if self.compression_count >= 1 or self._previous_summary:
return 0
return self.protect_first_n
def _protect_head_size(self, messages: List[Dict[str, Any]]) -> int:
"""Total count of head messages to protect.
@ -1860,14 +1877,19 @@ This compaction should PRIORITISE preserving all information related to the focu
the ``messages`` list (e.g. the gateway ``/compress`` handler
strips it before calling compress()).
Examples:
The ``protect_first_n`` portion DECAYS after the first compression
(see _effective_protect_first_n) so early user turns don't fossilize
across repeated compactions (#11996).
Examples (first compaction):
protect_first_n=0 system prompt only (or nothing if no system msg)
protect_first_n=3 system + first 3 non-system messages
After the first compaction: system prompt only.
"""
head = 0
if messages and messages[0].get("role") == "system":
head = 1
return head + self.protect_first_n
return head + self._effective_protect_first_n()
def _align_boundary_backward(self, messages: List[Dict[str, Any]], idx: int) -> int:
"""Pull a compress-end boundary backward to avoid splitting a

View file

@ -225,6 +225,39 @@ class TestCompress:
# original content is present in either case.
assert msgs[-2]["content"] in result[-2]["content"]
def test_protect_first_n_decays_after_first_compression(self):
"""Regression for #11996: protect_first_n must protect early turns on
the FIRST compaction but DECAY afterwards, so the same early user
messages don't get re-copied verbatim into every child session and
fossilize (grow immortal) across a long, repeatedly-compressed
session. The system prompt is always protected separately."""
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3)
msgs = [{"role": "system", "content": "sys"}] + [
{"role": "user" if i % 2 == 0 else "assistant", "content": f"m{i}"}
for i in range(10)
]
# First compaction: protect system + first 3 non-system.
assert c.compression_count == 0
assert c._effective_protect_first_n() == 3
assert c._protect_head_size(msgs) == 1 + 3
# Simulate having compressed once — early turns now live in the summary.
c.compression_count = 1
assert c._effective_protect_first_n() == 0
assert c._protect_head_size(msgs) == 1 # system prompt only
def test_protect_first_n_decays_when_previous_summary_exists(self):
"""Even if compression_count was reset, an existing handoff summary
means the early turns are already captured decay still applies."""
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3)
c.compression_count = 0
c._previous_summary = "[CONTEXT SUMMARY]: earlier work"
assert c._effective_protect_first_n() == 0
class TestGenerateSummaryNoneContent:
"""Regression: content=None (from tool-call-only assistant messages) must not crash."""