mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(agent): count tokens, not just rows, as preflight compression progress
Rebased onto god-file Phase 1 refactor — preflight compression has moved from agent/conversation_loop.py to agent/turn_context.py (no semantic change in the refactor itself; the bug below was carried over verbatim). The preflight compression loop in ``turn_context.py`` uses ``len(messages) >= _orig_len`` to decide whether a compression pass has made progress. That conflates two different conditions: a true no-op (transcript materially unchanged) and effective token compression that summarises message contents but keeps the same number of rows. The second case is misread as "Cannot compress further" — the session then surfaces ``Context length exceeded`` and auto-resets even when the post-compression estimate is far below the model context window. Observed example from #39548: a Telegram session on GPT-5.5 with a 1M context dropped from ~288k → ~183k tokens (a 36% reduction) while preserving 220 messages. The loop treats that as exhaustion and the gateway auto-resets the session. Fix --- Add ``_compression_made_progress(orig_len, new_len, orig_tokens, new_tokens)`` and call it after the post-pass ``estimate_request_tokens_rough`` (which is moved up to run *before* the progress check instead of after it). Either a row-count reduction OR a token-count reduction now counts as progress; only when neither moves do we break out as "stuck". Fixes #39548
This commit is contained in:
parent
33efff0d8c
commit
b08ee8ad04
2 changed files with 97 additions and 7 deletions
|
|
@ -34,6 +34,23 @@ from agent.model_metadata import estimate_request_tokens_rough
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _compression_made_progress(
|
||||
orig_len: int, new_len: int, orig_tokens: int, new_tokens: int
|
||||
) -> bool:
|
||||
"""Return ``True`` if a compression pass materially reduced the request.
|
||||
|
||||
Compression can succeed by summarising message contents — reducing the
|
||||
estimated request token count — without reducing the message row
|
||||
count. Treating row count as the sole progress signal false-positives
|
||||
on size-only wins and surfaces a misleading "Cannot compress further"
|
||||
failure even when post-compression tokens are well below the model
|
||||
context window. See issue #39548 for an observed case: 220 → 220
|
||||
messages, ~288k → ~183k tokens on a 1M-context model still triggered
|
||||
auto-reset.
|
||||
"""
|
||||
return new_len < orig_len or new_tokens < orig_tokens
|
||||
|
||||
|
||||
@dataclass
|
||||
class TurnContext:
|
||||
"""Values produced by the turn prologue and consumed by the turn loop."""
|
||||
|
|
@ -313,23 +330,30 @@ def build_turn_context(
|
|||
)
|
||||
for _pass in range(3):
|
||||
_orig_len = len(messages)
|
||||
_orig_tokens = _preflight_tokens
|
||||
messages, active_system_prompt = agent._compress_context(
|
||||
messages, system_message, approx_tokens=_preflight_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
if len(messages) >= _orig_len:
|
||||
break # Cannot compress further
|
||||
# Re-estimate now so size-only compression (same row count,
|
||||
# lower token count — e.g. summarising tool outputs) is
|
||||
# recognised as progress instead of being misread as
|
||||
# "Cannot compress further". Fixes #39548.
|
||||
_preflight_tokens = estimate_request_tokens_rough(
|
||||
messages,
|
||||
system_prompt=active_system_prompt or "",
|
||||
tools=agent.tools or None,
|
||||
)
|
||||
if not _compression_made_progress(
|
||||
_orig_len, len(messages), _orig_tokens, _preflight_tokens
|
||||
):
|
||||
break # Cannot compress further: neither rows nor tokens moved
|
||||
conversation_history = None
|
||||
agent._empty_content_retries = 0
|
||||
agent._thinking_prefill_retries = 0
|
||||
agent._last_content_with_tools = None
|
||||
agent._last_content_tools_all_housekeeping = False
|
||||
agent._mute_post_response = False
|
||||
_preflight_tokens = estimate_request_tokens_rough(
|
||||
messages,
|
||||
system_prompt=active_system_prompt or "",
|
||||
tools=agent.tools or None,
|
||||
)
|
||||
if not _compressor.should_compress(_preflight_tokens):
|
||||
break
|
||||
|
||||
|
|
|
|||
66
tests/agent/test_compression_progress.py
Normal file
66
tests/agent/test_compression_progress.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""Regression: detect compression progress by tokens, not just rows.
|
||||
|
||||
Issue #39548: preflight compression in the turn prologue was checking
|
||||
``len(messages) >= _orig_len`` to decide "Cannot compress further". This
|
||||
false-positives when a pass summarises message contents — reducing the
|
||||
estimated request token count without removing any rows — and surfaces a
|
||||
spurious ``Context length exceeded`` failure followed by an auto-reset of
|
||||
an otherwise healthy session.
|
||||
|
||||
These tests pin the contract of ``_compression_made_progress``: either a
|
||||
row-count reduction OR a token-count reduction counts as progress.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from agent.turn_context import _compression_made_progress
|
||||
|
||||
|
||||
class TestCompressionMadeProgress:
|
||||
def test_rows_reduced_counts_as_progress(self):
|
||||
"""Removing message rows is the obvious progress signal."""
|
||||
assert _compression_made_progress(
|
||||
orig_len=10, new_len=5, orig_tokens=1000, new_tokens=1000
|
||||
) is True
|
||||
|
||||
def test_tokens_reduced_without_row_change_counts_as_progress(self):
|
||||
"""Issue #39548: 220 → 220 rows, 288k → 183k tokens IS progress."""
|
||||
assert _compression_made_progress(
|
||||
orig_len=220, new_len=220, orig_tokens=288_028, new_tokens=183_180
|
||||
) is True
|
||||
|
||||
def test_both_reduced_counts_as_progress(self):
|
||||
"""Common case: summarising drops some rows and shrinks the rest."""
|
||||
assert _compression_made_progress(
|
||||
orig_len=220, new_len=180, orig_tokens=288_028, new_tokens=150_000
|
||||
) is True
|
||||
|
||||
def test_neither_moved_means_no_progress(self):
|
||||
"""The genuine "stuck" case — same rows, same tokens, give up."""
|
||||
assert _compression_made_progress(
|
||||
orig_len=10, new_len=10, orig_tokens=1000, new_tokens=1000
|
||||
) is False
|
||||
|
||||
def test_rows_grew_and_tokens_grew_means_no_progress(self):
|
||||
"""Pathological: the pass made the request larger — definitely stuck."""
|
||||
assert _compression_made_progress(
|
||||
orig_len=10, new_len=12, orig_tokens=1000, new_tokens=1200
|
||||
) is False
|
||||
|
||||
def test_rows_grew_but_tokens_dropped_is_progress(self):
|
||||
"""Edge: summary rows may expand the row count while shrinking tokens.
|
||||
|
||||
Token reduction alone is sufficient to keep the loop going.
|
||||
"""
|
||||
assert _compression_made_progress(
|
||||
orig_len=10, new_len=11, orig_tokens=1000, new_tokens=600
|
||||
) is True
|
||||
|
||||
def test_tokens_grew_but_rows_dropped_is_progress(self):
|
||||
"""Edge: row reduction alone is sufficient even if tokens nominally
|
||||
creep up (e.g. summary verbosity). Row-count reduction is a hard
|
||||
signal that the transcript actually shrank.
|
||||
"""
|
||||
assert _compression_made_progress(
|
||||
orig_len=10, new_len=5, orig_tokens=1000, new_tokens=1100
|
||||
) is True
|
||||
Loading…
Add table
Add a link
Reference in a new issue