fix(compressor): _prune_old_tool_results boundary direction

2026-05-06 02:41:48 +00:00 · 2026-04-26 22:50:40 +02:00 · 2026-04-26 22:50:40 +02:00 · b7bbc62503
commit b7bbc62503
parent d29f90e89d
2 changed files with 51 additions and 1 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -554,7 +554,16 @@ class ContextCompressor(ContextEngine):
                    break
                accumulated += msg_tokens
                boundary = i
-            prune_boundary = max(boundary, len(result) - min_protect)
+            # Translate the budget walk into a "protected count", apply the
            # floor in count-space (where `max` reads naturally: protect at
            # least `min_protect` messages or whatever the budget reserved,
            # whichever is more), then convert back to a prune boundary.
            # Doing this in index-space with `max` would invert the direction
            # (smaller index = MORE protected), so a generous budget would
            # silently get truncated back down to `min_protect`.
            budget_protect_count = len(result) - boundary
            protected_count = max(budget_protect_count, min_protect)
            prune_boundary = len(result) - protected_count
        else:
            prune_boundary = len(result) - protect_tail_count
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -1281,6 +1281,47 @@ class TestTokenBudgetTailProtection:
        assert isinstance(cut, int)
        assert 0 <= cut <= len(messages)
    def test_generous_budget_protects_everything_floor_does_not_override(
        self, budget_compressor
    ):
        """A budget that covers the whole transcript must prune nothing —
        ``protect_tail_count`` is a minimum floor, not a ceiling."""
        c = budget_compressor
        # 100 alternating assistant/tool messages.  Each tool result has
        # *unique* content so the dedup pass (Pass 1, which is independent
        # of prune_boundary) is a no-op and we isolate the boundary logic.
        messages = []
        for i in range(50):
            messages.append({
                "role": "assistant", "content": None,
                "tool_calls": [{
                    "id": f"c{i}",
                    "type": "function",
                    "function": {"name": "noop", "arguments": "{}"},
                }],
            })
            messages.append({
                "role": "tool",
                "tool_call_id": f"c{i}",
                "content": f"unique-tool-output-{i:03d}-" + ("x" * 250),
            })
        # Budget large enough to cover the whole transcript many times over,
        # so the budget walk completes without hitting its break condition
        # and the boundary lands at 0 ("protect everything").
        _, pruned = c._prune_old_tool_results(
            messages,
            protect_tail_count=20,
            protect_tail_tokens=10_000_000,
        )
        assert pruned == 0, (
            "budget said protect everything, but the floor still pruned "
            f"{pruned} messages — protect_tail_count is acting as a ceiling, "
            "not a minimum floor"
        )
 class TestUpdateModelBudgets:
    """Regression: update_model() must recalculate token budgets."""