mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(compression): don't overwrite the -1 post-compression sentinel in preflight seed (#36718)
compress_context() sets last_prompt_tokens=-1 right after compression to mark "no real API usage yet". The preflight display-seed used `_preflight_tokens > (last_prompt_tokens or 0)`, and `(-1 or 0)` is -1 (truthy), so any positive rough estimate clobbered the sentinel with a schema-inflated count — re-triggering compression on the next turn. Treat any negative value as "no real data yet" and skip the seed. Salvaged from #40246 as the minimal root-cause fix. The original also added an `_awaiting_suppression_count` bounded-window state machine to should_compress() across 3 files; left out here to keep blast radius small — the sentinel guard alone fixes the re-fire. The suppression window can be added separately if the usage=None-stub edge case warrants it. Co-authored-by: davidgut1982 <davidgut1982@users.noreply.github.com>
This commit is contained in:
parent
3763355f08
commit
3c8f1dee8d
2 changed files with 44 additions and 1 deletions
|
|
@ -641,7 +641,14 @@ def run_conversation(
|
|||
# Skipped when deferring — a deferred estimate is known to over-count
|
||||
# vs the last real provider prompt, so trusting it for the display
|
||||
# would re-introduce the very desync we're avoiding.
|
||||
if _preflight_tokens > (_compressor.last_prompt_tokens or 0):
|
||||
_last = _compressor.last_prompt_tokens
|
||||
# Do NOT overwrite the -1 sentinel. compress_context() sets
|
||||
# last_prompt_tokens=-1 right after compression to mark "no real API
|
||||
# usage yet". `(x or 0)` evaluates to -1 (truthy) for the sentinel,
|
||||
# so the old comparison was always True and clobbered the sentinel
|
||||
# with a schema-inflated rough estimate — re-triggering compression
|
||||
# on the next turn (#36718). Treat any negative value as "no data".
|
||||
if _last >= 0 and _preflight_tokens > _last:
|
||||
_compressor.last_prompt_tokens = _preflight_tokens
|
||||
|
||||
if _preflight_deferred:
|
||||
|
|
|
|||
|
|
@ -2147,3 +2147,39 @@ class TestTruncateToolCallArgsJson:
|
|||
parsed = _json.loads(shrunk)
|
||||
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
|
||||
assert parsed["content"].endswith("...[truncated]")
|
||||
|
||||
|
||||
class TestPreflightSentinelGuard:
|
||||
"""Regression for #36718: the preflight token-display seed in
|
||||
run_conversation must NOT overwrite the -1 sentinel that
|
||||
compress_context() sets immediately after compression.
|
||||
|
||||
The old guard `_preflight_tokens > (last_prompt_tokens or 0)` evaluated
|
||||
`(-1 or 0)` -> -1 (truthy), so any positive preflight estimate was > -1
|
||||
and clobbered the sentinel with a schema-inflated rough count, re-firing
|
||||
compression on the next turn. The fix treats any negative value as
|
||||
"no real usage yet" and skips the seed.
|
||||
"""
|
||||
|
||||
def _seed(self, last_prompt_tokens, preflight_tokens):
|
||||
# Mirror the exact guard in agent/conversation_loop.py run_conversation.
|
||||
_last = last_prompt_tokens
|
||||
if _last >= 0 and preflight_tokens > _last:
|
||||
return preflight_tokens # would overwrite
|
||||
return last_prompt_tokens # preserved
|
||||
|
||||
def test_sentinel_preserved_after_compression(self, compressor):
|
||||
compressor.last_prompt_tokens = -1
|
||||
# A large schema-inflated preflight estimate must NOT overwrite -1.
|
||||
result = self._seed(compressor.last_prompt_tokens, 250_000)
|
||||
assert result == -1
|
||||
|
||||
def test_real_value_still_revises_upward(self, compressor):
|
||||
compressor.last_prompt_tokens = 10_000
|
||||
result = self._seed(compressor.last_prompt_tokens, 50_000)
|
||||
assert result == 50_000
|
||||
|
||||
def test_real_value_not_revised_downward(self, compressor):
|
||||
compressor.last_prompt_tokens = 50_000
|
||||
result = self._seed(compressor.last_prompt_tokens, 10_000)
|
||||
assert result == 50_000
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue