diff --git a/agent/context_compressor.py b/agent/context_compressor.py index ef40cbfaf..228b88198 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -297,6 +297,7 @@ class ContextCompressor(ContextEngine): self._last_summary_error = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 + self._last_compression_time = 0.0 def update_model( self, @@ -318,6 +319,8 @@ class ContextCompressor(ContextEngine): int(context_length * self.threshold_percent), MINIMUM_CONTEXT_LENGTH, ) + if self.threshold_tokens >= context_length: + self.threshold_tokens = int(context_length * self.threshold_percent) def __init__( self, @@ -354,10 +357,17 @@ class ContextCompressor(ContextEngine): # the percentage would suggest a lower value. This prevents premature # compression on large-context models at 50% while keeping the % sane # for models right at the minimum. + # However, when context_length <= MINIMUM_CONTEXT_LENGTH the floor + # would make threshold >= 100% of context, which is unreachable — the + # API errors out before prompt_tokens can reach that value. In that + # case fall back to the percentage-based value so compression can + # actually trigger. self.threshold_tokens = max( int(self.context_length * threshold_percent), MINIMUM_CONTEXT_LENGTH, ) + if self.threshold_tokens >= self.context_length: + self.threshold_tokens = int(self.context_length * threshold_percent) self.compression_count = 0 # Derive token budgets: ratio is relative to the threshold, not total context @@ -389,6 +399,8 @@ class ContextCompressor(ContextEngine): # Anti-thrashing: track whether last compression was effective self._last_compression_savings_pct: float = 100.0 self._ineffective_compression_count: int = 0 + self._last_compression_time: float = 0.0 + self._ANTI_THRASH_RECOVERY_SECONDS: float = 300.0 self._summary_failure_cooldown_until: float = 0.0 self._last_summary_error: Optional[str] = None @@ -408,15 +420,28 @@ class ContextCompressor(ContextEngine): if tokens < self.threshold_tokens: return False # Anti-thrashing: back off if recent compressions were ineffective + # Auto-recovery: if enough time has passed since the last compression + # attempt, reset the counter. Without this, a session that had two + # ineffective compressions early on will never auto-compress again, + # even as the context grows far beyond the threshold. if self._ineffective_compression_count >= 2: - if not self.quiet_mode: - logger.warning( - "Compression skipped — last %d compressions saved <10%% each. " - "Consider /new to start a fresh session, or /compress " - "for focused compression.", - self._ineffective_compression_count, - ) - return False + _elapsed = time.monotonic() - self._last_compression_time + if _elapsed > self._ANTI_THRASH_RECOVERY_SECONDS: + self._ineffective_compression_count = 0 + if not self.quiet_mode: + logger.info( + "Anti-thrashing reset: %.0fs since last compression attempt", + _elapsed, + ) + else: + if not self.quiet_mode: + logger.warning( + "Compression skipped — last %d compressions saved <10%% each. " + "Consider /new to start a fresh session, or /compress " + "for focused compression.", + self._ineffective_compression_count, + ) + return False return True # ------------------------------------------------------------------ @@ -1281,6 +1306,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Anti-thrashing: track compression effectiveness savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0 self._last_compression_savings_pct = savings_pct + self._last_compression_time = time.monotonic() if savings_pct < 10: self._ineffective_compression_count += 1 else: diff --git a/run_agent.py b/run_agent.py index f7a929118..5419d239b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8248,9 +8248,14 @@ class AIAgent: # Update token estimate after compaction so pressure calculations # use the post-compression count, not the stale pre-compression one. - _compressed_est = ( - estimate_tokens_rough(new_system_prompt) - + estimate_messages_tokens_rough(compressed) + # Use estimate_request_tokens_rough (not estimate_messages_tokens_rough) + # to include tools schema tokens — with 50+ tools enabled, schemas alone + # can add 20-30K tokens, and omitting them causes the next compression + # cycle to trigger much later than the configured threshold. + _compressed_est = estimate_request_tokens_rough( + compressed, + system_prompt=new_system_prompt or "", + tools=self.tools or None, ) self.context_compressor.last_prompt_tokens = _compressed_est self.context_compressor.last_completion_tokens = 0