diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 72955251dcb..660792feab6 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -641,7 +641,14 @@ def run_conversation( # Skipped when deferring — a deferred estimate is known to over-count # vs the last real provider prompt, so trusting it for the display # would re-introduce the very desync we're avoiding. - if _preflight_tokens > (_compressor.last_prompt_tokens or 0): + _last = _compressor.last_prompt_tokens + # Do NOT overwrite the -1 sentinel. compress_context() sets + # last_prompt_tokens=-1 right after compression to mark "no real API + # usage yet". `(x or 0)` evaluates to -1 (truthy) for the sentinel, + # so the old comparison was always True and clobbered the sentinel + # with a schema-inflated rough estimate — re-triggering compression + # on the next turn (#36718). Treat any negative value as "no data". + if _last >= 0 and _preflight_tokens > _last: _compressor.last_prompt_tokens = _preflight_tokens if _preflight_deferred: diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 5ce753864c9..1b4242e0e01 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -2147,3 +2147,39 @@ class TestTruncateToolCallArgsJson: parsed = _json.loads(shrunk) assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md" assert parsed["content"].endswith("...[truncated]") + + +class TestPreflightSentinelGuard: + """Regression for #36718: the preflight token-display seed in + run_conversation must NOT overwrite the -1 sentinel that + compress_context() sets immediately after compression. + + The old guard `_preflight_tokens > (last_prompt_tokens or 0)` evaluated + `(-1 or 0)` -> -1 (truthy), so any positive preflight estimate was > -1 + and clobbered the sentinel with a schema-inflated rough count, re-firing + compression on the next turn. The fix treats any negative value as + "no real usage yet" and skips the seed. + """ + + def _seed(self, last_prompt_tokens, preflight_tokens): + # Mirror the exact guard in agent/conversation_loop.py run_conversation. + _last = last_prompt_tokens + if _last >= 0 and preflight_tokens > _last: + return preflight_tokens # would overwrite + return last_prompt_tokens # preserved + + def test_sentinel_preserved_after_compression(self, compressor): + compressor.last_prompt_tokens = -1 + # A large schema-inflated preflight estimate must NOT overwrite -1. + result = self._seed(compressor.last_prompt_tokens, 250_000) + assert result == -1 + + def test_real_value_still_revises_upward(self, compressor): + compressor.last_prompt_tokens = 10_000 + result = self._seed(compressor.last_prompt_tokens, 50_000) + assert result == 50_000 + + def test_real_value_not_revised_downward(self, compressor): + compressor.last_prompt_tokens = 50_000 + result = self._seed(compressor.last_prompt_tokens, 10_000) + assert result == 50_000