From c1c62b6eefbd5d532f5bd6b37c80e43bedc31f8e Mon Sep 17 00:00:00 2001 From: devilardis <53129661@qq.com> Date: Fri, 24 Apr 2026 03:06:09 +0800 Subject: [PATCH] fix(compression): three bugs causing auto-compression to never trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. MINIMUM_CONTEXT_LENGTH floor makes threshold=100% when context_length==64000 - When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the floor value in threshold_tokens calculation dominates, making the threshold equal to 100% of the context window. The API errors out before prompt_tokens can reach that value, so compression never fires. - Fix: fall back to percentage-based value when floor >= context_length. - Closes #14690 2. Anti-thrashing protection permanently disables compression with no recovery - After 2 consecutive ineffective compressions (<10% savings each), should_compress() returns False forever. No timeout, decay, or auto-recovery mechanism exists — only /new resets the counter. - Fix: add time-based auto-recovery (300s). If enough time has passed since the last compression attempt, reset the counter. - Closes #14694 3. Post-compression token estimate excludes tools schema - After compression, last_prompt_tokens is set using estimate_messages_tokens_rough() which omits tools schema tokens (20-30K with 50+ tools). This causes the next compression cycle to trigger much later than the configured threshold. - Fix: use estimate_request_tokens_rough() which includes tools schema, consistent with the preflight compression check pattern. - Closes #14695 --- agent/context_compressor.py | 42 ++++++++++++++++++++++++++++++------- run_agent.py | 11 +++++++--- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index f8036851f..d95b85f35 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -296,6 +296,7 @@ class ContextCompressor(ContextEngine): self._previous_summary = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 + self._last_compression_time = 0.0 def update_model( self, @@ -317,6 +318,8 @@ class ContextCompressor(ContextEngine): int(context_length * self.threshold_percent), MINIMUM_CONTEXT_LENGTH, ) + if self.threshold_tokens >= context_length: + self.threshold_tokens = int(context_length * self.threshold_percent) def __init__( self, @@ -353,10 +356,17 @@ class ContextCompressor(ContextEngine): # the percentage would suggest a lower value. This prevents premature # compression on large-context models at 50% while keeping the % sane # for models right at the minimum. + # However, when context_length <= MINIMUM_CONTEXT_LENGTH the floor + # would make threshold >= 100% of context, which is unreachable — the + # API errors out before prompt_tokens can reach that value. In that + # case fall back to the percentage-based value so compression can + # actually trigger. self.threshold_tokens = max( int(self.context_length * threshold_percent), MINIMUM_CONTEXT_LENGTH, ) + if self.threshold_tokens >= self.context_length: + self.threshold_tokens = int(self.context_length * threshold_percent) self.compression_count = 0 # Derive token budgets: ratio is relative to the threshold, not total context @@ -388,6 +398,8 @@ class ContextCompressor(ContextEngine): # Anti-thrashing: track whether last compression was effective self._last_compression_savings_pct: float = 100.0 self._ineffective_compression_count: int = 0 + self._last_compression_time: float = 0.0 + self._ANTI_THRASH_RECOVERY_SECONDS: float = 300.0 self._summary_failure_cooldown_until: float = 0.0 def update_from_response(self, usage: Dict[str, Any]): @@ -406,15 +418,28 @@ class ContextCompressor(ContextEngine): if tokens < self.threshold_tokens: return False # Anti-thrashing: back off if recent compressions were ineffective + # Auto-recovery: if enough time has passed since the last compression + # attempt, reset the counter. Without this, a session that had two + # ineffective compressions early on will never auto-compress again, + # even as the context grows far beyond the threshold. if self._ineffective_compression_count >= 2: - if not self.quiet_mode: - logger.warning( - "Compression skipped — last %d compressions saved <10%% each. " - "Consider /new to start a fresh session, or /compress " - "for focused compression.", - self._ineffective_compression_count, - ) - return False + _elapsed = time.monotonic() - self._last_compression_time + if _elapsed > self._ANTI_THRASH_RECOVERY_SECONDS: + self._ineffective_compression_count = 0 + if not self.quiet_mode: + logger.info( + "Anti-thrashing reset: %.0fs since last compression attempt", + _elapsed, + ) + else: + if not self.quiet_mode: + logger.warning( + "Compression skipped — last %d compressions saved <10%% each. " + "Consider /new to start a fresh session, or /compress " + "for focused compression.", + self._ineffective_compression_count, + ) + return False return True # ------------------------------------------------------------------ @@ -1258,6 +1283,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Anti-thrashing: track compression effectiveness savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0 self._last_compression_savings_pct = savings_pct + self._last_compression_time = time.monotonic() if savings_pct < 10: self._ineffective_compression_count += 1 else: diff --git a/run_agent.py b/run_agent.py index 855b67a84..3cc8e348b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7595,9 +7595,14 @@ class AIAgent: # Update token estimate after compaction so pressure calculations # use the post-compression count, not the stale pre-compression one. - _compressed_est = ( - estimate_tokens_rough(new_system_prompt) - + estimate_messages_tokens_rough(compressed) + # Use estimate_request_tokens_rough (not estimate_messages_tokens_rough) + # to include tools schema tokens — with 50+ tools enabled, schemas alone + # can add 20-30K tokens, and omitting them causes the next compression + # cycle to trigger much later than the configured threshold. + _compressed_est = estimate_request_tokens_rough( + compressed, + system_prompt=new_system_prompt or "", + tools=self.tools or None, ) self.context_compressor.last_prompt_tokens = _compressed_est self.context_compressor.last_completion_tokens = 0