From c1c62b6eefbd5d532f5bd6b37c80e43bedc31f8e Mon Sep 17 00:00:00 2001
From: devilardis <53129661@qq.com>
Date: Fri, 24 Apr 2026 03:06:09 +0800
Subject: [PATCH] fix(compression): three bugs causing auto-compression to
 never trigger
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. MINIMUM_CONTEXT_LENGTH floor makes threshold=100% when context_length==64000
   - When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the floor
     value in threshold_tokens calculation dominates, making the threshold
     equal to 100% of the context window. The API errors out before
     prompt_tokens can reach that value, so compression never fires.
   - Fix: fall back to percentage-based value when floor >= context_length.
   - Closes #14690

2. Anti-thrashing protection permanently disables compression with no recovery
   - After 2 consecutive ineffective compressions (<10% savings each),
     should_compress() returns False forever. No timeout, decay, or
     auto-recovery mechanism exists — only /new resets the counter.
   - Fix: add time-based auto-recovery (300s). If enough time has passed
     since the last compression attempt, reset the counter.
   - Closes #14694

3. Post-compression token estimate excludes tools schema
   - After compression, last_prompt_tokens is set using
     estimate_messages_tokens_rough() which omits tools schema tokens
     (20-30K with 50+ tools). This causes the next compression cycle
     to trigger much later than the configured threshold.
   - Fix: use estimate_request_tokens_rough() which includes tools schema,
     consistent with the preflight compression check pattern.
   - Closes #14695
---
 agent/context_compressor.py | 42 ++++++++++++++++++++++++++++++-------
 run_agent.py                | 11 +++++++---
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index f8036851f..d95b85f35 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -296,6 +296,7 @@ class ContextCompressor(ContextEngine):
         self._previous_summary = None
         self._last_compression_savings_pct = 100.0
         self._ineffective_compression_count = 0
+        self._last_compression_time = 0.0
 
     def update_model(
         self,
@@ -317,6 +318,8 @@ class ContextCompressor(ContextEngine):
             int(context_length * self.threshold_percent),
             MINIMUM_CONTEXT_LENGTH,
         )
+        if self.threshold_tokens >= context_length:
+            self.threshold_tokens = int(context_length * self.threshold_percent)
 
     def __init__(
         self,
@@ -353,10 +356,17 @@ class ContextCompressor(ContextEngine):
         # the percentage would suggest a lower value.  This prevents premature
         # compression on large-context models at 50% while keeping the % sane
         # for models right at the minimum.
+        # However, when context_length <= MINIMUM_CONTEXT_LENGTH the floor
+        # would make threshold >= 100% of context, which is unreachable — the
+        # API errors out before prompt_tokens can reach that value.  In that
+        # case fall back to the percentage-based value so compression can
+        # actually trigger.
         self.threshold_tokens = max(
             int(self.context_length * threshold_percent),
             MINIMUM_CONTEXT_LENGTH,
         )
+        if self.threshold_tokens >= self.context_length:
+            self.threshold_tokens = int(self.context_length * threshold_percent)
         self.compression_count = 0
 
         # Derive token budgets: ratio is relative to the threshold, not total context
@@ -388,6 +398,8 @@ class ContextCompressor(ContextEngine):
         # Anti-thrashing: track whether last compression was effective
         self._last_compression_savings_pct: float = 100.0
         self._ineffective_compression_count: int = 0
+        self._last_compression_time: float = 0.0
+        self._ANTI_THRASH_RECOVERY_SECONDS: float = 300.0
         self._summary_failure_cooldown_until: float = 0.0
 
     def update_from_response(self, usage: Dict[str, Any]):
@@ -406,15 +418,28 @@ class ContextCompressor(ContextEngine):
         if tokens < self.threshold_tokens:
             return False
         # Anti-thrashing: back off if recent compressions were ineffective
+        # Auto-recovery: if enough time has passed since the last compression
+        # attempt, reset the counter.  Without this, a session that had two
+        # ineffective compressions early on will never auto-compress again,
+        # even as the context grows far beyond the threshold.
         if self._ineffective_compression_count >= 2:
-            if not self.quiet_mode:
-                logger.warning(
-                    "Compression skipped — last %d compressions saved <10%% each. "
-                    "Consider /new to start a fresh session, or /compress <topic> "
-                    "for focused compression.",
-                    self._ineffective_compression_count,
-                )
-            return False
+            _elapsed = time.monotonic() - self._last_compression_time
+            if _elapsed > self._ANTI_THRASH_RECOVERY_SECONDS:
+                self._ineffective_compression_count = 0
+                if not self.quiet_mode:
+                    logger.info(
+                        "Anti-thrashing reset: %.0fs since last compression attempt",
+                        _elapsed,
+                    )
+            else:
+                if not self.quiet_mode:
+                    logger.warning(
+                        "Compression skipped — last %d compressions saved <10%% each. "
+                        "Consider /new to start a fresh session, or /compress <topic> "
+                        "for focused compression.",
+                        self._ineffective_compression_count,
+                    )
+                return False
         return True
 
     # ------------------------------------------------------------------
@@ -1258,6 +1283,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         # Anti-thrashing: track compression effectiveness
         savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
         self._last_compression_savings_pct = savings_pct
+        self._last_compression_time = time.monotonic()
         if savings_pct < 10:
             self._ineffective_compression_count += 1
         else:
diff --git a/run_agent.py b/run_agent.py
index 855b67a84..3cc8e348b 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7595,9 +7595,14 @@ class AIAgent:
 
         # Update token estimate after compaction so pressure calculations
         # use the post-compression count, not the stale pre-compression one.
-        _compressed_est = (
-            estimate_tokens_rough(new_system_prompt)
-            + estimate_messages_tokens_rough(compressed)
+        # Use estimate_request_tokens_rough (not estimate_messages_tokens_rough)
+        # to include tools schema tokens — with 50+ tools enabled, schemas alone
+        # can add 20-30K tokens, and omitting them causes the next compression
+        # cycle to trigger much later than the configured threshold.
+        _compressed_est = estimate_request_tokens_rough(
+            compressed,
+            system_prompt=new_system_prompt or "",
+            tools=self.tools or None,
         )
         self.context_compressor.last_prompt_tokens = _compressed_est
         self.context_compressor.last_completion_tokens = 0