mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
After a compaction, the post-compression path parks last_prompt_tokens=-1 and sets awaiting_real_usage_after_compression=True, but last_real_prompt_tokens still holds the stale pre-compression value (above threshold). should_defer_ preflight_to_real_usage() hit the 'last_real_prompt_tokens >= threshold => False' short-circuit and let preflight fire a SECOND compaction before the provider reported real post-compaction usage. Add an early-return on the awaiting flag so deferral holds for exactly one turn; update_from_response() clears it. The flag-setting half (#36718) already landed on main via the in-place compaction path (conversation_compression.py); this adds the missing should_defer guard that consumes it. Credit: - @ashishpatel26 (#38133) — diagnosis + the should_defer early-return design - @Tranquil-Flow (#36769) — same #36718 fix, identical guard placement Closes #36718.
This commit is contained in:
parent
b4cb33cd42
commit
b2c84a1626
2 changed files with 34 additions and 0 deletions
|
|
@ -878,6 +878,18 @@ class ContextCompressor(ContextEngine):
|
|||
"""
|
||||
if rough_tokens < self.threshold_tokens:
|
||||
return False
|
||||
# Immediately after a compaction the post-compression path sets
|
||||
# ``awaiting_real_usage_after_compression`` and parks
|
||||
# ``last_prompt_tokens = -1``, but ``last_real_prompt_tokens`` still
|
||||
# holds the STALE pre-compression value (above threshold — that's why
|
||||
# compaction fired). Without this guard that stale value defeats the
|
||||
# ``last_real_prompt_tokens >= threshold_tokens`` check below, so
|
||||
# preflight fires a SECOND compaction before the provider has reported
|
||||
# real token usage for the now-shorter conversation. Defer for exactly
|
||||
# one turn; update_from_response() clears the flag when real usage
|
||||
# arrives. (#36718)
|
||||
if self.awaiting_real_usage_after_compression:
|
||||
return True
|
||||
if self.last_real_prompt_tokens <= 0:
|
||||
return False
|
||||
if self.last_real_prompt_tokens >= self.threshold_tokens:
|
||||
|
|
|
|||
|
|
@ -86,6 +86,28 @@ class TestPreflightDeferral:
|
|||
|
||||
assert compressor.should_defer_preflight_to_real_usage(93_000) is False
|
||||
|
||||
def test_defers_immediately_after_compaction_with_stale_real_prompt(self, compressor):
|
||||
"""#36718: right after a compaction, last_real_prompt_tokens still holds
|
||||
the stale pre-compression value (above threshold). The awaiting flag
|
||||
must force deferral so preflight doesn't fire a SECOND compaction before
|
||||
real post-compaction usage arrives."""
|
||||
compressor.threshold_tokens = 85_000
|
||||
# Stale pre-compression value — would hit the `>= threshold => False`
|
||||
# short-circuit and defeat deferral without the flag guard.
|
||||
compressor.last_real_prompt_tokens = 120_000
|
||||
compressor.awaiting_real_usage_after_compression = True
|
||||
assert compressor.should_defer_preflight_to_real_usage(95_000) is True
|
||||
|
||||
def test_resumes_normal_deferral_after_flag_cleared(self, compressor):
|
||||
"""Once update_from_response() clears the flag, the normal baseline/
|
||||
growth deferral logic governs again (no permanent deferral)."""
|
||||
compressor.threshold_tokens = 85_000
|
||||
compressor.last_real_prompt_tokens = 120_000
|
||||
compressor.awaiting_real_usage_after_compression = False
|
||||
# Stale-high real prompt with the flag cleared => the >= threshold
|
||||
# short-circuit applies => no deferral.
|
||||
assert compressor.should_defer_preflight_to_real_usage(95_000) is False
|
||||
|
||||
|
||||
|
||||
class TestCompress:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue