Merge c1c62b6eef into 05d8f11085

2026-04-25 00:51:20 +00:00 · 2026-04-24 19:24:57 -05:00 · 2026-04-24 19:24:57 -05:00 · 3d5e1f5172
commit 3d5e1f5172
parent 05d8f11085 c1c62b6eef
2 changed files with 42 additions and 11 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -297,6 +297,7 @@ class ContextCompressor(ContextEngine):
        self._last_summary_error = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0
+        self._last_compression_time = 0.0

    def update_model(
        self,
@ -318,6 +319,8 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        if self.threshold_tokens >= context_length:
+            self.threshold_tokens = int(context_length * self.threshold_percent)

    def __init__(
        self,
@ -354,10 +357,17 @@ class ContextCompressor(ContextEngine):
        # the percentage would suggest a lower value.  This prevents premature
        # compression on large-context models at 50% while keeping the % sane
        # for models right at the minimum.
+        # However, when context_length <= MINIMUM_CONTEXT_LENGTH the floor
+        # would make threshold >= 100% of context, which is unreachable — the
+        # API errors out before prompt_tokens can reach that value.  In that
+        # case fall back to the percentage-based value so compression can
+        # actually trigger.
        self.threshold_tokens = max(
            int(self.context_length * threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        if self.threshold_tokens >= self.context_length:
+            self.threshold_tokens = int(self.context_length * threshold_percent)
        self.compression_count = 0

        # Derive token budgets: ratio is relative to the threshold, not total context
@ -389,6 +399,8 @@ class ContextCompressor(ContextEngine):
        # Anti-thrashing: track whether last compression was effective
        self._last_compression_savings_pct: float = 100.0
        self._ineffective_compression_count: int = 0
+        self._last_compression_time: float = 0.0
+        self._ANTI_THRASH_RECOVERY_SECONDS: float = 300.0
        self._summary_failure_cooldown_until: float = 0.0
        self._last_summary_error: Optional[str] = None

@ -408,15 +420,28 @@ class ContextCompressor(ContextEngine):
        if tokens < self.threshold_tokens:
            return False
        # Anti-thrashing: back off if recent compressions were ineffective
+        # Auto-recovery: if enough time has passed since the last compression
+        # attempt, reset the counter.  Without this, a session that had two
+        # ineffective compressions early on will never auto-compress again,
+        # even as the context grows far beyond the threshold.
        if self._ineffective_compression_count >= 2:
-            if not self.quiet_mode:
-                logger.warning(
-                    "Compression skipped — last %d compressions saved <10%% each. "
-                    "Consider /new to start a fresh session, or /compress <topic> "
-                    "for focused compression.",
-                    self._ineffective_compression_count,
-                )
-            return False
+            _elapsed = time.monotonic() - self._last_compression_time
+            if _elapsed > self._ANTI_THRASH_RECOVERY_SECONDS:
+                self._ineffective_compression_count = 0
+                if not self.quiet_mode:
+                    logger.info(
+                        "Anti-thrashing reset: %.0fs since last compression attempt",
+                        _elapsed,
+                    )
+            else:
+                if not self.quiet_mode:
+                    logger.warning(
+                        "Compression skipped — last %d compressions saved <10%% each. "
+                        "Consider /new to start a fresh session, or /compress <topic> "
+                        "for focused compression.",
+                        self._ineffective_compression_count,
+                    )
+                return False
        return True

    # ------------------------------------------------------------------
@ -1281,6 +1306,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        # Anti-thrashing: track compression effectiveness
        savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
        self._last_compression_savings_pct = savings_pct
+        self._last_compression_time = time.monotonic()
        if savings_pct < 10:
            self._ineffective_compression_count += 1
        else:
--- a/run_agent.py
+++ b/run_agent.py
@ -8248,9 +8248,14 @@ class AIAgent:

        # Update token estimate after compaction so pressure calculations
        # use the post-compression count, not the stale pre-compression one.
-        _compressed_est = (
-            estimate_tokens_rough(new_system_prompt)
-            + estimate_messages_tokens_rough(compressed)
+        # Use estimate_request_tokens_rough (not estimate_messages_tokens_rough)
+        # to include tools schema tokens — with 50+ tools enabled, schemas alone
+        # can add 20-30K tokens, and omitting them causes the next compression
+        # cycle to trigger much later than the configured threshold.
+        _compressed_est = estimate_request_tokens_rough(
+            compressed,
+            system_prompt=new_system_prompt or "",
+            tools=self.tools or None,
        )
        self.context_compressor.last_prompt_tokens = _compressed_est
        self.context_compressor.last_completion_tokens = 0