This commit is contained in:
devilardis 2026-04-24 19:24:57 -05:00 committed by GitHub
commit 3d5e1f5172
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 42 additions and 11 deletions

View file

@ -297,6 +297,7 @@ class ContextCompressor(ContextEngine):
self._last_summary_error = None
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
self._last_compression_time = 0.0
def update_model(
self,
@ -318,6 +319,8 @@ class ContextCompressor(ContextEngine):
int(context_length * self.threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
if self.threshold_tokens >= context_length:
self.threshold_tokens = int(context_length * self.threshold_percent)
def __init__(
self,
@ -354,10 +357,17 @@ class ContextCompressor(ContextEngine):
# the percentage would suggest a lower value. This prevents premature
# compression on large-context models at 50% while keeping the % sane
# for models right at the minimum.
# However, when context_length <= MINIMUM_CONTEXT_LENGTH the floor
# would make threshold >= 100% of context, which is unreachable — the
# API errors out before prompt_tokens can reach that value. In that
# case fall back to the percentage-based value so compression can
# actually trigger.
self.threshold_tokens = max(
int(self.context_length * threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
if self.threshold_tokens >= self.context_length:
self.threshold_tokens = int(self.context_length * threshold_percent)
self.compression_count = 0
# Derive token budgets: ratio is relative to the threshold, not total context
@ -389,6 +399,8 @@ class ContextCompressor(ContextEngine):
# Anti-thrashing: track whether last compression was effective
self._last_compression_savings_pct: float = 100.0
self._ineffective_compression_count: int = 0
self._last_compression_time: float = 0.0
self._ANTI_THRASH_RECOVERY_SECONDS: float = 300.0
self._summary_failure_cooldown_until: float = 0.0
self._last_summary_error: Optional[str] = None
@ -408,15 +420,28 @@ class ContextCompressor(ContextEngine):
if tokens < self.threshold_tokens:
return False
# Anti-thrashing: back off if recent compressions were ineffective
# Auto-recovery: if enough time has passed since the last compression
# attempt, reset the counter. Without this, a session that had two
# ineffective compressions early on will never auto-compress again,
# even as the context grows far beyond the threshold.
if self._ineffective_compression_count >= 2:
if not self.quiet_mode:
logger.warning(
"Compression skipped — last %d compressions saved <10%% each. "
"Consider /new to start a fresh session, or /compress <topic> "
"for focused compression.",
self._ineffective_compression_count,
)
return False
_elapsed = time.monotonic() - self._last_compression_time
if _elapsed > self._ANTI_THRASH_RECOVERY_SECONDS:
self._ineffective_compression_count = 0
if not self.quiet_mode:
logger.info(
"Anti-thrashing reset: %.0fs since last compression attempt",
_elapsed,
)
else:
if not self.quiet_mode:
logger.warning(
"Compression skipped — last %d compressions saved <10%% each. "
"Consider /new to start a fresh session, or /compress <topic> "
"for focused compression.",
self._ineffective_compression_count,
)
return False
return True
# ------------------------------------------------------------------
@ -1281,6 +1306,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Anti-thrashing: track compression effectiveness
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
self._last_compression_savings_pct = savings_pct
self._last_compression_time = time.monotonic()
if savings_pct < 10:
self._ineffective_compression_count += 1
else:

View file

@ -8248,9 +8248,14 @@ class AIAgent:
# Update token estimate after compaction so pressure calculations
# use the post-compression count, not the stale pre-compression one.
_compressed_est = (
estimate_tokens_rough(new_system_prompt)
+ estimate_messages_tokens_rough(compressed)
# Use estimate_request_tokens_rough (not estimate_messages_tokens_rough)
# to include tools schema tokens — with 50+ tools enabled, schemas alone
# can add 20-30K tokens, and omitting them causes the next compression
# cycle to trigger much later than the configured threshold.
_compressed_est = estimate_request_tokens_rough(
compressed,
system_prompt=new_system_prompt or "",
tools=self.tools or None,
)
self.context_compressor.last_prompt_tokens = _compressed_est
self.context_compressor.last_completion_tokens = 0