fix(compressor): _prune_old_tool_results boundary direction

This commit is contained in:
swithek 2026-04-26 22:50:40 +02:00 committed by Teknium
parent d29f90e89d
commit b7bbc62503
2 changed files with 51 additions and 1 deletions

View file

@ -554,7 +554,16 @@ class ContextCompressor(ContextEngine):
break break
accumulated += msg_tokens accumulated += msg_tokens
boundary = i boundary = i
prune_boundary = max(boundary, len(result) - min_protect) # Translate the budget walk into a "protected count", apply the
# floor in count-space (where `max` reads naturally: protect at
# least `min_protect` messages or whatever the budget reserved,
# whichever is more), then convert back to a prune boundary.
# Doing this in index-space with `max` would invert the direction
# (smaller index = MORE protected), so a generous budget would
# silently get truncated back down to `min_protect`.
budget_protect_count = len(result) - boundary
protected_count = max(budget_protect_count, min_protect)
prune_boundary = len(result) - protected_count
else: else:
prune_boundary = len(result) - protect_tail_count prune_boundary = len(result) - protect_tail_count

View file

@ -1281,6 +1281,47 @@ class TestTokenBudgetTailProtection:
assert isinstance(cut, int) assert isinstance(cut, int)
assert 0 <= cut <= len(messages) assert 0 <= cut <= len(messages)
def test_generous_budget_protects_everything_floor_does_not_override(
self, budget_compressor
):
"""A budget that covers the whole transcript must prune nothing —
``protect_tail_count`` is a minimum floor, not a ceiling."""
c = budget_compressor
# 100 alternating assistant/tool messages. Each tool result has
# *unique* content so the dedup pass (Pass 1, which is independent
# of prune_boundary) is a no-op and we isolate the boundary logic.
messages = []
for i in range(50):
messages.append({
"role": "assistant", "content": None,
"tool_calls": [{
"id": f"c{i}",
"type": "function",
"function": {"name": "noop", "arguments": "{}"},
}],
})
messages.append({
"role": "tool",
"tool_call_id": f"c{i}",
"content": f"unique-tool-output-{i:03d}-" + ("x" * 250),
})
# Budget large enough to cover the whole transcript many times over,
# so the budget walk completes without hitting its break condition
# and the boundary lands at 0 ("protect everything").
_, pruned = c._prune_old_tool_results(
messages,
protect_tail_count=20,
protect_tail_tokens=10_000_000,
)
assert pruned == 0, (
"budget said protect everything, but the floor still pruned "
f"{pruned} messages — protect_tail_count is acting as a ceiling, "
"not a minimum floor"
)
class TestUpdateModelBudgets: class TestUpdateModelBudgets:
"""Regression: update_model() must recalculate token budgets.""" """Regression: update_model() must recalculate token budgets."""