fix(compressor): _prune_old_tool_results boundary direction

2026-05-07 02:51:50 +00:00 · 2026-04-26 22:50:40 +02:00 · 2026-04-26 22:50:40 +02:00 · b7bbc62503
commit b7bbc62503
parent d29f90e89d
2 changed files with 51 additions and 1 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -554,7 +554,16 @@ class ContextCompressor(ContextEngine):
                    break
                accumulated += msg_tokens
                boundary = i
-            prune_boundary = max(boundary, len(result) - min_protect)
+            # Translate the budget walk into a "protected count", apply the
+            # floor in count-space (where `max` reads naturally: protect at
+            # least `min_protect` messages or whatever the budget reserved,
+            # whichever is more), then convert back to a prune boundary.
+            # Doing this in index-space with `max` would invert the direction
+            # (smaller index = MORE protected), so a generous budget would
+            # silently get truncated back down to `min_protect`.
+            budget_protect_count = len(result) - boundary
+            protected_count = max(budget_protect_count, min_protect)
+            prune_boundary = len(result) - protected_count
        else:
            prune_boundary = len(result) - protect_tail_count

--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -1281,6 +1281,47 @@ class TestTokenBudgetTailProtection:
        assert isinstance(cut, int)
        assert 0 <= cut <= len(messages)

+    def test_generous_budget_protects_everything_floor_does_not_override(
+        self, budget_compressor
+    ):
+        """A budget that covers the whole transcript must prune nothing —
+        ``protect_tail_count`` is a minimum floor, not a ceiling."""
+        c = budget_compressor
+
+        # 100 alternating assistant/tool messages.  Each tool result has
+        # *unique* content so the dedup pass (Pass 1, which is independent
+        # of prune_boundary) is a no-op and we isolate the boundary logic.
+        messages = []
+        for i in range(50):
+            messages.append({
+                "role": "assistant", "content": None,
+                "tool_calls": [{
+                    "id": f"c{i}",
+                    "type": "function",
+                    "function": {"name": "noop", "arguments": "{}"},
+                }],
+            })
+            messages.append({
+                "role": "tool",
+                "tool_call_id": f"c{i}",
+                "content": f"unique-tool-output-{i:03d}-" + ("x" * 250),
+            })
+
+        # Budget large enough to cover the whole transcript many times over,
+        # so the budget walk completes without hitting its break condition
+        # and the boundary lands at 0 ("protect everything").
+        _, pruned = c._prune_old_tool_results(
+            messages,
+            protect_tail_count=20,
+            protect_tail_tokens=10_000_000,
+        )
+
+        assert pruned == 0, (
+            "budget said protect everything, but the floor still pruned "
+            f"{pruned} messages — protect_tail_count is acting as a ceiling, "
+            "not a minimum floor"
+        )
+

 class TestUpdateModelBudgets:
    """Regression: update_model() must recalculate token budgets."""