mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(compression): three bugs causing auto-compression to never trigger
1. MINIMUM_CONTEXT_LENGTH floor makes threshold=100% when context_length==64000
- When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the floor
value in threshold_tokens calculation dominates, making the threshold
equal to 100% of the context window. The API errors out before
prompt_tokens can reach that value, so compression never fires.
- Fix: fall back to percentage-based value when floor >= context_length.
- Closes #14690
2. Anti-thrashing protection permanently disables compression with no recovery
- After 2 consecutive ineffective compressions (<10% savings each),
should_compress() returns False forever. No timeout, decay, or
auto-recovery mechanism exists — only /new resets the counter.
- Fix: add time-based auto-recovery (300s). If enough time has passed
since the last compression attempt, reset the counter.
- Closes #14694
3. Post-compression token estimate excludes tools schema
- After compression, last_prompt_tokens is set using
estimate_messages_tokens_rough() which omits tools schema tokens
(20-30K with 50+ tools). This causes the next compression cycle
to trigger much later than the configured threshold.
- Fix: use estimate_request_tokens_rough() which includes tools schema,
consistent with the preflight compression check pattern.
- Closes #14695
This commit is contained in:
parent
ce089169d5
commit
c1c62b6eef
2 changed files with 42 additions and 11 deletions
|
|
@ -296,6 +296,7 @@ class ContextCompressor(ContextEngine):
|
||||||
self._previous_summary = None
|
self._previous_summary = None
|
||||||
self._last_compression_savings_pct = 100.0
|
self._last_compression_savings_pct = 100.0
|
||||||
self._ineffective_compression_count = 0
|
self._ineffective_compression_count = 0
|
||||||
|
self._last_compression_time = 0.0
|
||||||
|
|
||||||
def update_model(
|
def update_model(
|
||||||
self,
|
self,
|
||||||
|
|
@ -317,6 +318,8 @@ class ContextCompressor(ContextEngine):
|
||||||
int(context_length * self.threshold_percent),
|
int(context_length * self.threshold_percent),
|
||||||
MINIMUM_CONTEXT_LENGTH,
|
MINIMUM_CONTEXT_LENGTH,
|
||||||
)
|
)
|
||||||
|
if self.threshold_tokens >= context_length:
|
||||||
|
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -353,10 +356,17 @@ class ContextCompressor(ContextEngine):
|
||||||
# the percentage would suggest a lower value. This prevents premature
|
# the percentage would suggest a lower value. This prevents premature
|
||||||
# compression on large-context models at 50% while keeping the % sane
|
# compression on large-context models at 50% while keeping the % sane
|
||||||
# for models right at the minimum.
|
# for models right at the minimum.
|
||||||
|
# However, when context_length <= MINIMUM_CONTEXT_LENGTH the floor
|
||||||
|
# would make threshold >= 100% of context, which is unreachable — the
|
||||||
|
# API errors out before prompt_tokens can reach that value. In that
|
||||||
|
# case fall back to the percentage-based value so compression can
|
||||||
|
# actually trigger.
|
||||||
self.threshold_tokens = max(
|
self.threshold_tokens = max(
|
||||||
int(self.context_length * threshold_percent),
|
int(self.context_length * threshold_percent),
|
||||||
MINIMUM_CONTEXT_LENGTH,
|
MINIMUM_CONTEXT_LENGTH,
|
||||||
)
|
)
|
||||||
|
if self.threshold_tokens >= self.context_length:
|
||||||
|
self.threshold_tokens = int(self.context_length * threshold_percent)
|
||||||
self.compression_count = 0
|
self.compression_count = 0
|
||||||
|
|
||||||
# Derive token budgets: ratio is relative to the threshold, not total context
|
# Derive token budgets: ratio is relative to the threshold, not total context
|
||||||
|
|
@ -388,6 +398,8 @@ class ContextCompressor(ContextEngine):
|
||||||
# Anti-thrashing: track whether last compression was effective
|
# Anti-thrashing: track whether last compression was effective
|
||||||
self._last_compression_savings_pct: float = 100.0
|
self._last_compression_savings_pct: float = 100.0
|
||||||
self._ineffective_compression_count: int = 0
|
self._ineffective_compression_count: int = 0
|
||||||
|
self._last_compression_time: float = 0.0
|
||||||
|
self._ANTI_THRASH_RECOVERY_SECONDS: float = 300.0
|
||||||
self._summary_failure_cooldown_until: float = 0.0
|
self._summary_failure_cooldown_until: float = 0.0
|
||||||
|
|
||||||
def update_from_response(self, usage: Dict[str, Any]):
|
def update_from_response(self, usage: Dict[str, Any]):
|
||||||
|
|
@ -406,15 +418,28 @@ class ContextCompressor(ContextEngine):
|
||||||
if tokens < self.threshold_tokens:
|
if tokens < self.threshold_tokens:
|
||||||
return False
|
return False
|
||||||
# Anti-thrashing: back off if recent compressions were ineffective
|
# Anti-thrashing: back off if recent compressions were ineffective
|
||||||
|
# Auto-recovery: if enough time has passed since the last compression
|
||||||
|
# attempt, reset the counter. Without this, a session that had two
|
||||||
|
# ineffective compressions early on will never auto-compress again,
|
||||||
|
# even as the context grows far beyond the threshold.
|
||||||
if self._ineffective_compression_count >= 2:
|
if self._ineffective_compression_count >= 2:
|
||||||
if not self.quiet_mode:
|
_elapsed = time.monotonic() - self._last_compression_time
|
||||||
logger.warning(
|
if _elapsed > self._ANTI_THRASH_RECOVERY_SECONDS:
|
||||||
"Compression skipped — last %d compressions saved <10%% each. "
|
self._ineffective_compression_count = 0
|
||||||
"Consider /new to start a fresh session, or /compress <topic> "
|
if not self.quiet_mode:
|
||||||
"for focused compression.",
|
logger.info(
|
||||||
self._ineffective_compression_count,
|
"Anti-thrashing reset: %.0fs since last compression attempt",
|
||||||
)
|
_elapsed,
|
||||||
return False
|
)
|
||||||
|
else:
|
||||||
|
if not self.quiet_mode:
|
||||||
|
logger.warning(
|
||||||
|
"Compression skipped — last %d compressions saved <10%% each. "
|
||||||
|
"Consider /new to start a fresh session, or /compress <topic> "
|
||||||
|
"for focused compression.",
|
||||||
|
self._ineffective_compression_count,
|
||||||
|
)
|
||||||
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
@ -1258,6 +1283,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||||
# Anti-thrashing: track compression effectiveness
|
# Anti-thrashing: track compression effectiveness
|
||||||
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
|
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
|
||||||
self._last_compression_savings_pct = savings_pct
|
self._last_compression_savings_pct = savings_pct
|
||||||
|
self._last_compression_time = time.monotonic()
|
||||||
if savings_pct < 10:
|
if savings_pct < 10:
|
||||||
self._ineffective_compression_count += 1
|
self._ineffective_compression_count += 1
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
11
run_agent.py
11
run_agent.py
|
|
@ -7595,9 +7595,14 @@ class AIAgent:
|
||||||
|
|
||||||
# Update token estimate after compaction so pressure calculations
|
# Update token estimate after compaction so pressure calculations
|
||||||
# use the post-compression count, not the stale pre-compression one.
|
# use the post-compression count, not the stale pre-compression one.
|
||||||
_compressed_est = (
|
# Use estimate_request_tokens_rough (not estimate_messages_tokens_rough)
|
||||||
estimate_tokens_rough(new_system_prompt)
|
# to include tools schema tokens — with 50+ tools enabled, schemas alone
|
||||||
+ estimate_messages_tokens_rough(compressed)
|
# can add 20-30K tokens, and omitting them causes the next compression
|
||||||
|
# cycle to trigger much later than the configured threshold.
|
||||||
|
_compressed_est = estimate_request_tokens_rough(
|
||||||
|
compressed,
|
||||||
|
system_prompt=new_system_prompt or "",
|
||||||
|
tools=self.tools or None,
|
||||||
)
|
)
|
||||||
self.context_compressor.last_prompt_tokens = _compressed_est
|
self.context_compressor.last_prompt_tokens = _compressed_est
|
||||||
self.context_compressor.last_completion_tokens = 0
|
self.context_compressor.last_completion_tokens = 0
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue