From 89389f1894eaadf441322a69c2b7df555bf5bb02 Mon Sep 17 00:00:00 2001 From: Tranquil-Flow Date: Sat, 25 Apr 2026 08:55:23 +1000 Subject: [PATCH] fix(agent): clamp compression threshold below context_length to ensure compression can trigger (#14690) When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the max() floor in threshold calculation pushed threshold_tokens to 100% of the context window, making auto-compression impossible since the API would error before tokens ever reached the threshold. Add a min() clamp after the max() floor so that threshold never exceeds 95% of context_length, giving 5% headroom for compression to trigger. Applied to both __init__ and update_model. --- agent/context_compressor.py | 12 ++++++ tests/agent/test_context_compressor.py | 53 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index ef40cbfafb..bba049230c 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -318,6 +318,12 @@ class ContextCompressor(ContextEngine): int(context_length * self.threshold_percent), MINIMUM_CONTEXT_LENGTH, ) + # Clamp: threshold must stay below context_length so compression can + # actually trigger before the API rejects the request. The 95% cap + # gives 5% headroom — enough to catch the threshold in time. + self.threshold_tokens = min( + self.threshold_tokens, int(context_length * 0.95) + ) def __init__( self, @@ -358,6 +364,12 @@ class ContextCompressor(ContextEngine): int(self.context_length * threshold_percent), MINIMUM_CONTEXT_LENGTH, ) + # Clamp: threshold must stay below context_length so compression can + # actually trigger before the API rejects the request. The 95% cap + # gives 5% headroom — enough to catch the threshold in time. + self.threshold_tokens = min( + self.threshold_tokens, int(self.context_length * 0.95) + ) self.compression_count = 0 # Derive token budgets: ratio is relative to the threshold, not total context diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 8072a58d98..fc544eab39 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -4,6 +4,7 @@ import pytest from unittest.mock import patch, MagicMock from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX +from agent.model_metadata import MINIMUM_CONTEXT_LENGTH @pytest.fixture() @@ -969,3 +970,55 @@ class TestTruncateToolCallArgsJson: parsed = _json.loads(shrunk) assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md" assert parsed["content"].endswith("...[truncated]") + +class TestThresholdClampAtMinimumContext: + """Regression test for #14690: when context_length == MINIMUM_CONTEXT_LENGTH, + the max() floor pushed threshold_tokens to 100% of the context window, + making auto-compression impossible (API errors before threshold is reached). + The fix clamps threshold_tokens to at most 95% of context_length.""" + + def test_init_threshold_below_context_length(self): + """__init__ must produce a threshold strictly below context_length.""" + with patch( + "agent.context_compressor.get_model_context_length", + return_value=MINIMUM_CONTEXT_LENGTH, + ): + c = ContextCompressor(model="test", quiet_mode=True) + assert c.threshold_tokens < c.context_length + assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95) + + def test_should_compress_at_threshold(self): + """should_compress returns True at the threshold and False just below.""" + with patch( + "agent.context_compressor.get_model_context_length", + return_value=MINIMUM_CONTEXT_LENGTH, + ): + c = ContextCompressor(model="test", quiet_mode=True) + assert c.should_compress(c.threshold_tokens - 1) is False + assert c.should_compress(c.threshold_tokens) is True + + def test_update_model_threshold_below_context_length(self): + """update_model() must also clamp the threshold.""" + with patch( + "agent.context_compressor.get_model_context_length", + return_value=200_000, + ): + c = ContextCompressor(model="test", quiet_mode=True) + c.update_model( + model="small/model", + context_length=MINIMUM_CONTEXT_LENGTH, + ) + assert c.threshold_tokens < MINIMUM_CONTEXT_LENGTH + assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95) + assert c.should_compress(c.threshold_tokens - 1) is False + assert c.should_compress(c.threshold_tokens) is True + + def test_large_context_unaffected_by_clamp(self): + """For large models the 95% cap does not change the threshold.""" + with patch( + "agent.context_compressor.get_model_context_length", + return_value=200_000, + ): + c = ContextCompressor(model="test", quiet_mode=True) + # 50% of 200K = 100K, 95% of 200K = 190K => min(100K, 190K) = 100K + assert c.threshold_tokens == 100_000