diff --git a/agent/context_compressor.py b/agent/context_compressor.py index bba049230c..22b3c4af3d 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -288,6 +288,23 @@ class ContextCompressor(ContextEngine): def name(self) -> str: return "compressor" + def _recompute_thresholds_and_budgets(self) -> None: + """Rebuild derived token thresholds after a context/model change.""" + self.threshold_tokens = max( + int(self.context_length * self.threshold_percent), + MINIMUM_CONTEXT_LENGTH, + ) + # Threshold must stay below context_length so compression can trigger + # before the provider rejects the request outright. + self.threshold_tokens = min( + self.threshold_tokens, int(self.context_length * 0.95) + ) + target_tokens = int(self.threshold_tokens * self.summary_target_ratio) + self.tail_token_budget = target_tokens + self.max_summary_tokens = min( + int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING, + ) + def on_session_reset(self) -> None: """Reset all per-session state for /new or /reset.""" super().on_session_reset() @@ -314,16 +331,7 @@ class ContextCompressor(ContextEngine): self.provider = provider self.api_mode = api_mode self.context_length = context_length - self.threshold_tokens = max( - int(context_length * self.threshold_percent), - MINIMUM_CONTEXT_LENGTH, - ) - # Clamp: threshold must stay below context_length so compression can - # actually trigger before the API rejects the request. The 95% cap - # gives 5% headroom — enough to catch the threshold in time. - self.threshold_tokens = min( - self.threshold_tokens, int(context_length * 0.95) - ) + self._recompute_thresholds_and_budgets() def __init__( self, @@ -360,25 +368,9 @@ class ContextCompressor(ContextEngine): # the percentage would suggest a lower value. This prevents premature # compression on large-context models at 50% while keeping the % sane # for models right at the minimum. - self.threshold_tokens = max( - int(self.context_length * threshold_percent), - MINIMUM_CONTEXT_LENGTH, - ) - # Clamp: threshold must stay below context_length so compression can - # actually trigger before the API rejects the request. The 95% cap - # gives 5% headroom — enough to catch the threshold in time. - self.threshold_tokens = min( - self.threshold_tokens, int(self.context_length * 0.95) - ) + self._recompute_thresholds_and_budgets() self.compression_count = 0 - # Derive token budgets: ratio is relative to the threshold, not total context - target_tokens = int(self.threshold_tokens * self.summary_target_ratio) - self.tail_token_budget = target_tokens - self.max_summary_tokens = min( - int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING, - ) - if not quiet_mode: logger.info( "Context compressor initialized: model=%s context_length=%d " diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index fc544eab39..a9fa8e4239 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -998,7 +998,7 @@ class TestThresholdClampAtMinimumContext: assert c.should_compress(c.threshold_tokens) is True def test_update_model_threshold_below_context_length(self): - """update_model() must also clamp the threshold.""" + """update_model() must also rebuild all derived budgets.""" with patch( "agent.context_compressor.get_model_context_length", return_value=200_000, @@ -1010,6 +1010,8 @@ class TestThresholdClampAtMinimumContext: ) assert c.threshold_tokens < MINIMUM_CONTEXT_LENGTH assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95) + assert c.tail_token_budget == int(c.threshold_tokens * c.summary_target_ratio) + assert c.max_summary_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.05) assert c.should_compress(c.threshold_tokens - 1) is False assert c.should_compress(c.threshold_tokens) is True @@ -1022,3 +1024,5 @@ class TestThresholdClampAtMinimumContext: c = ContextCompressor(model="test", quiet_mode=True) # 50% of 200K = 100K, 95% of 200K = 190K => min(100K, 190K) = 100K assert c.threshold_tokens == 100_000 + assert c.tail_token_budget == int(100_000 * c.summary_target_ratio) + assert c.max_summary_tokens == 10_000