mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(agent): clamp compression threshold below context_length to ensure compression can trigger (#14690)
When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the max() floor in threshold calculation pushed threshold_tokens to 100% of the context window, making auto-compression impossible since the API would error before tokens ever reached the threshold. Add a min() clamp after the max() floor so that threshold never exceeds 95% of context_length, giving 5% headroom for compression to trigger. Applied to both __init__ and update_model.
This commit is contained in:
parent
00c3d848d8
commit
89389f1894
2 changed files with 65 additions and 0 deletions
|
|
@ -318,6 +318,12 @@ class ContextCompressor(ContextEngine):
|
||||||
int(context_length * self.threshold_percent),
|
int(context_length * self.threshold_percent),
|
||||||
MINIMUM_CONTEXT_LENGTH,
|
MINIMUM_CONTEXT_LENGTH,
|
||||||
)
|
)
|
||||||
|
# Clamp: threshold must stay below context_length so compression can
|
||||||
|
# actually trigger before the API rejects the request. The 95% cap
|
||||||
|
# gives 5% headroom — enough to catch the threshold in time.
|
||||||
|
self.threshold_tokens = min(
|
||||||
|
self.threshold_tokens, int(context_length * 0.95)
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -358,6 +364,12 @@ class ContextCompressor(ContextEngine):
|
||||||
int(self.context_length * threshold_percent),
|
int(self.context_length * threshold_percent),
|
||||||
MINIMUM_CONTEXT_LENGTH,
|
MINIMUM_CONTEXT_LENGTH,
|
||||||
)
|
)
|
||||||
|
# Clamp: threshold must stay below context_length so compression can
|
||||||
|
# actually trigger before the API rejects the request. The 95% cap
|
||||||
|
# gives 5% headroom — enough to catch the threshold in time.
|
||||||
|
self.threshold_tokens = min(
|
||||||
|
self.threshold_tokens, int(self.context_length * 0.95)
|
||||||
|
)
|
||||||
self.compression_count = 0
|
self.compression_count = 0
|
||||||
|
|
||||||
# Derive token budgets: ratio is relative to the threshold, not total context
|
# Derive token budgets: ratio is relative to the threshold, not total context
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import pytest
|
||||||
from unittest.mock import patch, MagicMock
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
|
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
|
||||||
|
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
|
|
@ -969,3 +970,55 @@ class TestTruncateToolCallArgsJson:
|
||||||
parsed = _json.loads(shrunk)
|
parsed = _json.loads(shrunk)
|
||||||
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
|
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
|
||||||
assert parsed["content"].endswith("...[truncated]")
|
assert parsed["content"].endswith("...[truncated]")
|
||||||
|
|
||||||
|
class TestThresholdClampAtMinimumContext:
|
||||||
|
"""Regression test for #14690: when context_length == MINIMUM_CONTEXT_LENGTH,
|
||||||
|
the max() floor pushed threshold_tokens to 100% of the context window,
|
||||||
|
making auto-compression impossible (API errors before threshold is reached).
|
||||||
|
The fix clamps threshold_tokens to at most 95% of context_length."""
|
||||||
|
|
||||||
|
def test_init_threshold_below_context_length(self):
|
||||||
|
"""__init__ must produce a threshold strictly below context_length."""
|
||||||
|
with patch(
|
||||||
|
"agent.context_compressor.get_model_context_length",
|
||||||
|
return_value=MINIMUM_CONTEXT_LENGTH,
|
||||||
|
):
|
||||||
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
assert c.threshold_tokens < c.context_length
|
||||||
|
assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95)
|
||||||
|
|
||||||
|
def test_should_compress_at_threshold(self):
|
||||||
|
"""should_compress returns True at the threshold and False just below."""
|
||||||
|
with patch(
|
||||||
|
"agent.context_compressor.get_model_context_length",
|
||||||
|
return_value=MINIMUM_CONTEXT_LENGTH,
|
||||||
|
):
|
||||||
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
assert c.should_compress(c.threshold_tokens - 1) is False
|
||||||
|
assert c.should_compress(c.threshold_tokens) is True
|
||||||
|
|
||||||
|
def test_update_model_threshold_below_context_length(self):
|
||||||
|
"""update_model() must also clamp the threshold."""
|
||||||
|
with patch(
|
||||||
|
"agent.context_compressor.get_model_context_length",
|
||||||
|
return_value=200_000,
|
||||||
|
):
|
||||||
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
c.update_model(
|
||||||
|
model="small/model",
|
||||||
|
context_length=MINIMUM_CONTEXT_LENGTH,
|
||||||
|
)
|
||||||
|
assert c.threshold_tokens < MINIMUM_CONTEXT_LENGTH
|
||||||
|
assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95)
|
||||||
|
assert c.should_compress(c.threshold_tokens - 1) is False
|
||||||
|
assert c.should_compress(c.threshold_tokens) is True
|
||||||
|
|
||||||
|
def test_large_context_unaffected_by_clamp(self):
|
||||||
|
"""For large models the 95% cap does not change the threshold."""
|
||||||
|
with patch(
|
||||||
|
"agent.context_compressor.get_model_context_length",
|
||||||
|
return_value=200_000,
|
||||||
|
):
|
||||||
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
# 50% of 200K = 100K, 95% of 200K = 190K => min(100K, 190K) = 100K
|
||||||
|
assert c.threshold_tokens == 100_000
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue