From b18490b89022a15954e85a2bda33d20e2b0cfe0f Mon Sep 17 00:00:00 2001 From: islam666 Date: Sun, 7 Jun 2026 05:26:59 +0000 Subject: [PATCH] fix(compaction): prevent infinite loop when transcript fits in tail budget When summary_target_ratio is large (e.g. 0.45) and the context_length is moderate (e.g. 96000), the soft_ceiling (token_budget * 1.5) can exceed the total transcript size. _find_tail_cut_by_tokens walks the entire transcript without breaking early, and the resulting compress window is either empty (compress_start >= compress_end) or a single message whose summary-of-one overhead saves ~0 tokens. Both outcomes cause a no-op compression that does not increment _ineffective_compression_count, so should_compress() returns True on every subsequent turn and the loop repeats endlessly. Fix (two layers): 1. _find_tail_cut_by_tokens: when the backward walk consumed the entire transcript without breaking (cut_idx <= head_end and accumulated <= soft_ceiling), re-walk with the raw (non-inflated) token budget to find a meaningful cut that gives the summarizer a useful middle window. 2. compress(): when compress_start >= compress_end, increment _ineffective_compression_count and log a warning so the existing anti-thrashing guard in should_compress() can break the loop. Fixes #40803 --- agent/context_compressor.py | 50 ++++ .../test_infinite_compaction_loop.py | 250 ++++++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 tests/run_agent/test_infinite_compaction_loop.py diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 71c7944c772..8b6c932d0c6 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1818,6 +1818,41 @@ The user has requested that this compaction PRIORITISE preserving all informatio accumulated += msg_tokens cut_idx = i + # If the backward walk never broke early because the entire transcript + # fits within soft_ceiling, accumulated now holds the total transcript + # size. Without intervention _ensure_last_user_message_in_tail pushes + # cut_idx forward to include the last user message, and the caller's + # compress_start >= compress_end guard either returns unchanged (no-op) + # or compresses a single message — both of which trigger the infinite + # compaction loop described in #40803. + # + # Fix: when the whole transcript fits in soft_ceiling, compute a + # meaningful cut point using the raw (non-inflated) budget so that + # compression actually summarizes a worthwhile middle section. + if cut_idx <= head_end and accumulated <= soft_ceiling and accumulated > 0: + # The entire compressable region fits in the soft ceiling. + # Re-walk with the raw budget (no 1.5x multiplier) to find a + # split that gives the summarizer something useful. + raw_budget = token_budget + raw_accumulated = 0 + for j in range(n - 1, head_end - 1, -1): + raw_msg = messages[j] + raw_content = raw_msg.get("content") or "" + raw_len = _content_length_for_budget(raw_content) + raw_tok = raw_len // _CHARS_PER_TOKEN + 10 + for tc in raw_msg.get("tool_calls") or []: + if isinstance(tc, dict): + args = tc.get("function", {}).get("arguments", "") + raw_tok += len(args) // _CHARS_PER_TOKEN + if raw_accumulated + raw_tok > raw_budget and (n - j) >= min_tail: + cut_idx = j + break + raw_accumulated += raw_tok + cut_idx = j + # If the raw-budget walk also consumed everything (very small + # transcript), fall through — the existing fallback logic below + # will still force a minimal cut after head_end. + # Ensure we protect at least min_tail messages fallback_cut = n - min_tail cut_idx = min(cut_idx, fallback_cut) @@ -1920,6 +1955,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio compress_end = self._find_tail_cut_by_tokens(messages, compress_start) if compress_start >= compress_end: + # No compressable window — the entire transcript fits within + # the tail budget (soft_ceiling). Without recording this as + # an ineffective compression the anti-thrashing guard in + # should_compress() never fires and every subsequent turn + # re-triggers a no-op compression loop. (#40803) + self._ineffective_compression_count += 1 + self._last_compression_savings_pct = 0.0 + if not self.quiet_mode: + logger.warning( + "Compression skipped: compress_start (%d) >= compress_end (%d) " + "— transcript fits within tail budget, nothing to compress. " + "ineffective_compression_count=%d", + compress_start, compress_end, + self._ineffective_compression_count, + ) return messages turns_to_summarize = messages[compress_start:compress_end] diff --git a/tests/run_agent/test_infinite_compaction_loop.py b/tests/run_agent/test_infinite_compaction_loop.py new file mode 100644 index 00000000000..930df3381cc --- /dev/null +++ b/tests/run_agent/test_infinite_compaction_loop.py @@ -0,0 +1,250 @@ +"""Tests for the infinite compaction loop fix (issue #40803). + +When summary_target_ratio is large enough that the entire transcript fits +within soft_ceiling, the backward walk in _find_tail_cut_by_tokens never +breaks early. Without the fix this produces either a no-op compression +(compress_start >= compress_end) or a single-message compression whose +summary-of-one overhead saves 0 tokens — both of which cause the +compressor to fire on every subsequent turn with no progress. + +The fix adds two safeguards: +1. _find_tail_cut_by_tokens: when the whole transcript fits in soft_ceiling, + re-walk with the raw (non-inflated) budget to find a meaningful cut. +2. compress(): when compress_start >= compress_end, record the no-op as + an ineffective compression so should_compress() anti-thrashing fires. +""" + +from unittest.mock import patch, MagicMock + +from agent.context_compressor import ContextCompressor, _CHARS_PER_TOKEN + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_compressor(**kwargs) -> ContextCompressor: + defaults = dict( + model="test-model", + threshold_percent=0.65, + protect_first_n=2, + protect_last_n=3, + quiet_mode=True, + ) + defaults.update(kwargs) + with patch("agent.context_compressor.get_model_context_length", return_value=96000): + return ContextCompressor(**defaults) + + +def _build_session(n_turns: int, words_per_turn: int = 20) -> list: + """Build a multi-turn conversation with a system prompt.""" + base_text = " ".join(["a"] * words_per_turn) + messages = [{"role": "system", "content": "You are a helpful agent."}] + for i in range(n_turns): + messages.append({"role": "user", "content": f"{base_text} (user turn {i})"}) + messages.append({"role": "assistant", "content": f"{base_text} (assistant turn {i})"}) + return messages + + +# --------------------------------------------------------------------------- +# Test: compress_start >= compress_end registers as ineffective +# --------------------------------------------------------------------------- + +class TestCompressNoOpRegistersIneffective: + """When compress_start >= compress_end, the fix records this as + an ineffective compression so the anti-thrashing guard fires. + + We trigger this path by having _find_tail_cut_by_tokens return + head_end (which makes compress_end = head_end + 1, same as + compress_start after alignment).""" + + def test_no_op_increments_counter(self): + """compress_start >= compress_end -> _ineffective_compression_count += 1""" + comp = _make_compressor( + summary_target_ratio=0.45, + config_context_length=96000, + ) + # A large session that passes the min_for_compress check + messages = _build_session(10, words_per_turn=10) + comp.last_prompt_tokens = 65_000 + + # Mock _find_tail_cut_by_tokens to return head_end, + # causing compress_start >= compress_end + original = comp._find_tail_cut_by_tokens + comp._find_tail_cut_by_tokens = lambda msgs, he: he # force no-op + + result = comp.compress(messages, current_tokens=65_000) + + assert comp._ineffective_compression_count >= 1, ( + f"Expected ineffective_compression_count >= 1, got {comp._ineffective_compression_count}" + ) + + def test_no_op_sets_savings_to_zero(self): + """compress_start >= compress_end -> _last_compression_savings_pct = 0""" + comp = _make_compressor( + summary_target_ratio=0.45, + config_context_length=96000, + ) + messages = _build_session(10, words_per_turn=10) + comp.last_prompt_tokens = 65_000 + comp._find_tail_cut_by_tokens = lambda msgs, he: he # force no-op + + comp.compress(messages, current_tokens=65_000) + + assert comp._last_compression_savings_pct == 0.0 + + def test_two_no_ops_block_should_compress(self): + """After 2 no-op compressions, should_compress returns False.""" + comp = _make_compressor( + summary_target_ratio=0.45, + config_context_length=96000, + ) + messages = _build_session(10, words_per_turn=10) + comp.last_prompt_tokens = 65_000 + comp._find_tail_cut_by_tokens = lambda msgs, he: he # force no-op + + comp.compress(messages, current_tokens=65_000) + comp.compress(messages, current_tokens=65_000) + + assert comp._ineffective_compression_count >= 2 + assert not comp.should_compress(65_000), ( + "should_compress should return False after 2+ ineffective compressions" + ) + + def test_no_op_returns_unchanged_messages(self): + """compress_start >= compress_end -> messages returned unchanged""" + comp = _make_compressor( + summary_target_ratio=0.45, + config_context_length=96000, + ) + messages = _build_session(10, words_per_turn=10) + comp.last_prompt_tokens = 65_000 + original_cut = comp._find_tail_cut_by_tokens + comp._find_tail_cut_by_tokens = lambda msgs, he: he # force no-op + + result = comp.compress(messages, current_tokens=65_000) + + assert len(result) == len(messages), ( + f"Expected unchanged message count {len(messages)}, got {len(result)}" + ) + comp._find_tail_cut_by_tokens = original_cut + + +# --------------------------------------------------------------------------- +# Test: _find_tail_cut_by_tokens raw-budget fallback +# --------------------------------------------------------------------------- + +class TestTailCutRawBudgetFallback: + """When the entire transcript fits within soft_ceiling, the fix + re-walks with the raw budget to find a meaningful cut point.""" + + def test_meaningful_cut_with_large_ratio(self): + """With summary_target_ratio=0.45, _find_tail_cut_by_tokens still + leaves a meaningful compressable region.""" + comp = _make_compressor( + summary_target_ratio=0.45, + config_context_length=96000, + ) + messages = _build_session(20, words_per_turn=20) + head_end = comp._protect_head_size(messages) + head_end = comp._align_boundary_forward(messages, head_end) + + cut = comp._find_tail_cut_by_tokens(messages, head_end) + + n = len(messages) + middle_size = cut - head_end + assert middle_size >= 3, ( + f"Expected at least 3 messages in compressable region, got {middle_size} " + f"(cut={cut}, head_end={head_end}, n={n})" + ) + + def test_default_ratio_still_works(self): + """Default ratio (0.20) should not be affected by the fix.""" + comp = _make_compressor( + summary_target_ratio=0.20, + config_context_length=96000, + ) + messages = _build_session(20, words_per_turn=50) + head_end = comp._protect_head_size(messages) + head_end = comp._align_boundary_forward(messages, head_end) + + cut = comp._find_tail_cut_by_tokens(messages, head_end) + + n = len(messages) + assert head_end < cut < n, ( + f"Expected head_end ({head_end}) < cut ({cut}) < n ({n})" + ) + + def test_proactive_fix_prevents_no_op_window(self): + """The raw-budget fallback in _find_tail_cut_by_tokens should prevent + compress_start >= compress_end for the exact issue scenario: + context_length=96000, summary_target_ratio=0.45.""" + comp = _make_compressor( + summary_target_ratio=0.45, + config_context_length=96000, + ) + # Simulate the issue scenario: 16 messages, all fitting in soft_ceiling + messages = _build_session(8, words_per_turn=30) # 17 messages + head_end = comp._protect_head_size(messages) + head_end = comp._align_boundary_forward(messages, head_end) + + cut = comp._find_tail_cut_by_tokens(messages, head_end) + + # With the fix, cut should be well past head_end + assert cut > head_end + 1, ( + f"Expected cut ({cut}) > head_end ({head_end}) + 1, " + f"meaning the compressable window is non-trivial" + ) + + +# --------------------------------------------------------------------------- +# Test: Effective compression resets counter +# --------------------------------------------------------------------------- + +class TestEffectiveCompressionResetsCounter: + """When compression actually saves tokens, the ineffective counter resets.""" + + def test_effective_compression_resets_counter(self): + """After an effective compression, _ineffective_compression_count = 0.""" + comp = _make_compressor( + summary_target_ratio=0.20, + config_context_length=96000, + ) + messages = _build_session(30, words_per_turn=100) + comp._generate_summary = MagicMock(return_value="Compacted summary of earlier turns.") + comp.last_prompt_tokens = 65_000 + + comp.compress(messages, current_tokens=65_000) + + assert comp._ineffective_compression_count == 0, ( + f"Expected 0 ineffective compressions with effective compression, " + f"got {comp._ineffective_compression_count}" + ) + + +# --------------------------------------------------------------------------- +# Test: anti-thrashing in should_compress +# --------------------------------------------------------------------------- + +class TestAntiThrashing: + """Directly test the should_compress anti-thrashing guard.""" + + def test_ineffective_count_2_blocks(self): + """_ineffective_compression_count >= 2 -> should_compress returns False.""" + comp = _make_compressor(config_context_length=96000) + comp.last_prompt_tokens = 65_000 + comp._ineffective_compression_count = 2 + assert not comp.should_compress(65_000) + + def test_ineffective_count_1_allows(self): + """_ineffective_compression_count = 1 -> should_compress still True.""" + comp = _make_compressor(config_context_length=96000) + comp.last_prompt_tokens = 65_000 + comp._ineffective_compression_count = 1 + assert comp.should_compress(65_000) + + def test_below_threshold_allows(self): + """Tokens below threshold -> should_compress returns False regardless.""" + comp = _make_compressor(config_context_length=96000) + comp.last_prompt_tokens = 10_000 + assert not comp.should_compress(10_000)