mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge c1c62b6eef into 05d8f11085
This commit is contained in:
commit
3d5e1f5172
2 changed files with 42 additions and 11 deletions
|
|
@ -297,6 +297,7 @@ class ContextCompressor(ContextEngine):
|
|||
self._last_summary_error = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
self._last_compression_time = 0.0
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
|
|
@ -318,6 +319,8 @@ class ContextCompressor(ContextEngine):
|
|||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
if self.threshold_tokens >= context_length:
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -354,10 +357,17 @@ class ContextCompressor(ContextEngine):
|
|||
# the percentage would suggest a lower value. This prevents premature
|
||||
# compression on large-context models at 50% while keeping the % sane
|
||||
# for models right at the minimum.
|
||||
# However, when context_length <= MINIMUM_CONTEXT_LENGTH the floor
|
||||
# would make threshold >= 100% of context, which is unreachable — the
|
||||
# API errors out before prompt_tokens can reach that value. In that
|
||||
# case fall back to the percentage-based value so compression can
|
||||
# actually trigger.
|
||||
self.threshold_tokens = max(
|
||||
int(self.context_length * threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
if self.threshold_tokens >= self.context_length:
|
||||
self.threshold_tokens = int(self.context_length * threshold_percent)
|
||||
self.compression_count = 0
|
||||
|
||||
# Derive token budgets: ratio is relative to the threshold, not total context
|
||||
|
|
@ -389,6 +399,8 @@ class ContextCompressor(ContextEngine):
|
|||
# Anti-thrashing: track whether last compression was effective
|
||||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._last_compression_time: float = 0.0
|
||||
self._ANTI_THRASH_RECOVERY_SECONDS: float = 300.0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
|
||||
|
|
@ -408,15 +420,28 @@ class ContextCompressor(ContextEngine):
|
|||
if tokens < self.threshold_tokens:
|
||||
return False
|
||||
# Anti-thrashing: back off if recent compressions were ineffective
|
||||
# Auto-recovery: if enough time has passed since the last compression
|
||||
# attempt, reset the counter. Without this, a session that had two
|
||||
# ineffective compressions early on will never auto-compress again,
|
||||
# even as the context grows far beyond the threshold.
|
||||
if self._ineffective_compression_count >= 2:
|
||||
if not self.quiet_mode:
|
||||
logger.warning(
|
||||
"Compression skipped — last %d compressions saved <10%% each. "
|
||||
"Consider /new to start a fresh session, or /compress <topic> "
|
||||
"for focused compression.",
|
||||
self._ineffective_compression_count,
|
||||
)
|
||||
return False
|
||||
_elapsed = time.monotonic() - self._last_compression_time
|
||||
if _elapsed > self._ANTI_THRASH_RECOVERY_SECONDS:
|
||||
self._ineffective_compression_count = 0
|
||||
if not self.quiet_mode:
|
||||
logger.info(
|
||||
"Anti-thrashing reset: %.0fs since last compression attempt",
|
||||
_elapsed,
|
||||
)
|
||||
else:
|
||||
if not self.quiet_mode:
|
||||
logger.warning(
|
||||
"Compression skipped — last %d compressions saved <10%% each. "
|
||||
"Consider /new to start a fresh session, or /compress <topic> "
|
||||
"for focused compression.",
|
||||
self._ineffective_compression_count,
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
@ -1281,6 +1306,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
# Anti-thrashing: track compression effectiveness
|
||||
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
|
||||
self._last_compression_savings_pct = savings_pct
|
||||
self._last_compression_time = time.monotonic()
|
||||
if savings_pct < 10:
|
||||
self._ineffective_compression_count += 1
|
||||
else:
|
||||
|
|
|
|||
11
run_agent.py
11
run_agent.py
|
|
@ -8248,9 +8248,14 @@ class AIAgent:
|
|||
|
||||
# Update token estimate after compaction so pressure calculations
|
||||
# use the post-compression count, not the stale pre-compression one.
|
||||
_compressed_est = (
|
||||
estimate_tokens_rough(new_system_prompt)
|
||||
+ estimate_messages_tokens_rough(compressed)
|
||||
# Use estimate_request_tokens_rough (not estimate_messages_tokens_rough)
|
||||
# to include tools schema tokens — with 50+ tools enabled, schemas alone
|
||||
# can add 20-30K tokens, and omitting them causes the next compression
|
||||
# cycle to trigger much later than the configured threshold.
|
||||
_compressed_est = estimate_request_tokens_rough(
|
||||
compressed,
|
||||
system_prompt=new_system_prompt or "",
|
||||
tools=self.tools or None,
|
||||
)
|
||||
self.context_compressor.last_prompt_tokens = _compressed_est
|
||||
self.context_compressor.last_completion_tokens = 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue