This commit is contained in:
Evi Nova 2026-04-25 10:17:50 +10:00 committed by GitHub
commit a170ed063b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 76 additions and 15 deletions

View file

@ -288,6 +288,23 @@ class ContextCompressor(ContextEngine):
def name(self) -> str:
return "compressor"
def _recompute_thresholds_and_budgets(self) -> None:
"""Rebuild derived token thresholds after a context/model change."""
self.threshold_tokens = max(
int(self.context_length * self.threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
# Threshold must stay below context_length so compression can trigger
# before the provider rejects the request outright.
self.threshold_tokens = min(
self.threshold_tokens, int(self.context_length * 0.95)
)
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
self.tail_token_budget = target_tokens
self.max_summary_tokens = min(
int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
)
def on_session_reset(self) -> None:
"""Reset all per-session state for /new or /reset."""
super().on_session_reset()
@ -314,10 +331,7 @@ class ContextCompressor(ContextEngine):
self.provider = provider
self.api_mode = api_mode
self.context_length = context_length
self.threshold_tokens = max(
int(context_length * self.threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
self._recompute_thresholds_and_budgets()
def __init__(
self,
@ -354,19 +368,9 @@ class ContextCompressor(ContextEngine):
# the percentage would suggest a lower value. This prevents premature
# compression on large-context models at 50% while keeping the % sane
# for models right at the minimum.
self.threshold_tokens = max(
int(self.context_length * threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
self._recompute_thresholds_and_budgets()
self.compression_count = 0
# Derive token budgets: ratio is relative to the threshold, not total context
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
self.tail_token_budget = target_tokens
self.max_summary_tokens = min(
int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
)
if not quiet_mode:
logger.info(
"Context compressor initialized: model=%s context_length=%d "

View file

@ -4,6 +4,7 @@ import pytest
from unittest.mock import patch, MagicMock
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
@pytest.fixture()
@ -969,3 +970,59 @@ class TestTruncateToolCallArgsJson:
parsed = _json.loads(shrunk)
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
assert parsed["content"].endswith("...[truncated]")
class TestThresholdClampAtMinimumContext:
"""Regression test for #14690: when context_length == MINIMUM_CONTEXT_LENGTH,
the max() floor pushed threshold_tokens to 100% of the context window,
making auto-compression impossible (API errors before threshold is reached).
The fix clamps threshold_tokens to at most 95% of context_length."""
def test_init_threshold_below_context_length(self):
"""__init__ must produce a threshold strictly below context_length."""
with patch(
"agent.context_compressor.get_model_context_length",
return_value=MINIMUM_CONTEXT_LENGTH,
):
c = ContextCompressor(model="test", quiet_mode=True)
assert c.threshold_tokens < c.context_length
assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95)
def test_should_compress_at_threshold(self):
"""should_compress returns True at the threshold and False just below."""
with patch(
"agent.context_compressor.get_model_context_length",
return_value=MINIMUM_CONTEXT_LENGTH,
):
c = ContextCompressor(model="test", quiet_mode=True)
assert c.should_compress(c.threshold_tokens - 1) is False
assert c.should_compress(c.threshold_tokens) is True
def test_update_model_threshold_below_context_length(self):
"""update_model() must also rebuild all derived budgets."""
with patch(
"agent.context_compressor.get_model_context_length",
return_value=200_000,
):
c = ContextCompressor(model="test", quiet_mode=True)
c.update_model(
model="small/model",
context_length=MINIMUM_CONTEXT_LENGTH,
)
assert c.threshold_tokens < MINIMUM_CONTEXT_LENGTH
assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95)
assert c.tail_token_budget == int(c.threshold_tokens * c.summary_target_ratio)
assert c.max_summary_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.05)
assert c.should_compress(c.threshold_tokens - 1) is False
assert c.should_compress(c.threshold_tokens) is True
def test_large_context_unaffected_by_clamp(self):
"""For large models the 95% cap does not change the threshold."""
with patch(
"agent.context_compressor.get_model_context_length",
return_value=200_000,
):
c = ContextCompressor(model="test", quiet_mode=True)
# 50% of 200K = 100K, 95% of 200K = 190K => min(100K, 190K) = 100K
assert c.threshold_tokens == 100_000
assert c.tail_token_budget == int(100_000 * c.summary_target_ratio)
assert c.max_summary_tokens == 10_000