fix(compression): don't overwrite the -1 post-compression sentinel in preflight seed (#36718)

compress_context() sets last_prompt_tokens=-1 right after compression to
mark "no real API usage yet". The preflight display-seed used
`_preflight_tokens > (last_prompt_tokens or 0)`, and `(-1 or 0)` is -1
(truthy), so any positive rough estimate clobbered the sentinel with a
schema-inflated count — re-triggering compression on the next turn.
Treat any negative value as "no real data yet" and skip the seed.

Salvaged from #40246 as the minimal root-cause fix. The original also
added an `_awaiting_suppression_count` bounded-window state machine to
should_compress() across 3 files; left out here to keep blast radius
small — the sentinel guard alone fixes the re-fire. The suppression
window can be added separately if the usage=None-stub edge case warrants it.

Co-authored-by: davidgut1982 <davidgut1982@users.noreply.github.com>
This commit is contained in:
Teknium 2026-06-06 08:21:42 -07:00
parent 3763355f08
commit 3c8f1dee8d
2 changed files with 44 additions and 1 deletions

View file

@ -641,7 +641,14 @@ def run_conversation(
# Skipped when deferring — a deferred estimate is known to over-count
# vs the last real provider prompt, so trusting it for the display
# would re-introduce the very desync we're avoiding.
if _preflight_tokens > (_compressor.last_prompt_tokens or 0):
_last = _compressor.last_prompt_tokens
# Do NOT overwrite the -1 sentinel. compress_context() sets
# last_prompt_tokens=-1 right after compression to mark "no real API
# usage yet". `(x or 0)` evaluates to -1 (truthy) for the sentinel,
# so the old comparison was always True and clobbered the sentinel
# with a schema-inflated rough estimate — re-triggering compression
# on the next turn (#36718). Treat any negative value as "no data".
if _last >= 0 and _preflight_tokens > _last:
_compressor.last_prompt_tokens = _preflight_tokens
if _preflight_deferred:

View file

@ -2147,3 +2147,39 @@ class TestTruncateToolCallArgsJson:
parsed = _json.loads(shrunk)
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
assert parsed["content"].endswith("...[truncated]")
class TestPreflightSentinelGuard:
"""Regression for #36718: the preflight token-display seed in
run_conversation must NOT overwrite the -1 sentinel that
compress_context() sets immediately after compression.
The old guard `_preflight_tokens > (last_prompt_tokens or 0)` evaluated
`(-1 or 0)` -> -1 (truthy), so any positive preflight estimate was > -1
and clobbered the sentinel with a schema-inflated rough count, re-firing
compression on the next turn. The fix treats any negative value as
"no real usage yet" and skips the seed.
"""
def _seed(self, last_prompt_tokens, preflight_tokens):
# Mirror the exact guard in agent/conversation_loop.py run_conversation.
_last = last_prompt_tokens
if _last >= 0 and preflight_tokens > _last:
return preflight_tokens # would overwrite
return last_prompt_tokens # preserved
def test_sentinel_preserved_after_compression(self, compressor):
compressor.last_prompt_tokens = -1
# A large schema-inflated preflight estimate must NOT overwrite -1.
result = self._seed(compressor.last_prompt_tokens, 250_000)
assert result == -1
def test_real_value_still_revises_upward(self, compressor):
compressor.last_prompt_tokens = 10_000
result = self._seed(compressor.last_prompt_tokens, 50_000)
assert result == 50_000
def test_real_value_not_revised_downward(self, compressor):
compressor.last_prompt_tokens = 50_000
result = self._seed(compressor.last_prompt_tokens, 10_000)
assert result == 50_000