mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(compression): exclude completion tokens from compression trigger (#12026)
Cherry-picked from PR #12481 by @Sanjays2402. Reasoning models (GLM-5.1, QwQ, DeepSeek R1) inflate completion_tokens with internal thinking tokens. The compression trigger summed prompt_tokens + completion_tokens, causing premature compression at ~42% actual context usage instead of the configured 50% threshold. Now uses only prompt_tokens — completion tokens don't consume context window space for the next API call. - 3 new regression tests - Added AUTHOR_MAP entry for @Sanjays2402 Closes #12026
This commit is contained in:
parent
42c30985c7
commit
570f8bab8f
3 changed files with 68 additions and 4 deletions
10
run_agent.py
10
run_agent.py
|
|
@ -11736,10 +11736,12 @@ class AIAgent:
|
|||
# should_compress(0) never fires. (#2153)
|
||||
_compressor = self.context_compressor
|
||||
if _compressor.last_prompt_tokens > 0:
|
||||
_real_tokens = (
|
||||
_compressor.last_prompt_tokens
|
||||
+ _compressor.last_completion_tokens
|
||||
)
|
||||
# Only use prompt_tokens — completion/reasoning
|
||||
# tokens don't consume context window space.
|
||||
# Thinking models (GLM-5.1, QwQ, DeepSeek R1)
|
||||
# inflate completion_tokens with reasoning,
|
||||
# causing premature compression. (#12026)
|
||||
_real_tokens = _compressor.last_prompt_tokens
|
||||
else:
|
||||
_real_tokens = estimate_messages_tokens_rough(messages)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue