mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): recompute compressor budgets on context updates
This commit is contained in:
parent
89389f1894
commit
1484a559fe
2 changed files with 24 additions and 28 deletions
|
|
@ -288,6 +288,23 @@ class ContextCompressor(ContextEngine):
|
|||
def name(self) -> str:
|
||||
return "compressor"
|
||||
|
||||
def _recompute_thresholds_and_budgets(self) -> None:
|
||||
"""Rebuild derived token thresholds after a context/model change."""
|
||||
self.threshold_tokens = max(
|
||||
int(self.context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Threshold must stay below context_length so compression can trigger
|
||||
# before the provider rejects the request outright.
|
||||
self.threshold_tokens = min(
|
||||
self.threshold_tokens, int(self.context_length * 0.95)
|
||||
)
|
||||
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
|
||||
self.tail_token_budget = target_tokens
|
||||
self.max_summary_tokens = min(
|
||||
int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
|
||||
)
|
||||
|
||||
def on_session_reset(self) -> None:
|
||||
"""Reset all per-session state for /new or /reset."""
|
||||
super().on_session_reset()
|
||||
|
|
@ -314,16 +331,7 @@ class ContextCompressor(ContextEngine):
|
|||
self.provider = provider
|
||||
self.api_mode = api_mode
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = max(
|
||||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Clamp: threshold must stay below context_length so compression can
|
||||
# actually trigger before the API rejects the request. The 95% cap
|
||||
# gives 5% headroom — enough to catch the threshold in time.
|
||||
self.threshold_tokens = min(
|
||||
self.threshold_tokens, int(context_length * 0.95)
|
||||
)
|
||||
self._recompute_thresholds_and_budgets()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -360,25 +368,9 @@ class ContextCompressor(ContextEngine):
|
|||
# the percentage would suggest a lower value. This prevents premature
|
||||
# compression on large-context models at 50% while keeping the % sane
|
||||
# for models right at the minimum.
|
||||
self.threshold_tokens = max(
|
||||
int(self.context_length * threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Clamp: threshold must stay below context_length so compression can
|
||||
# actually trigger before the API rejects the request. The 95% cap
|
||||
# gives 5% headroom — enough to catch the threshold in time.
|
||||
self.threshold_tokens = min(
|
||||
self.threshold_tokens, int(self.context_length * 0.95)
|
||||
)
|
||||
self._recompute_thresholds_and_budgets()
|
||||
self.compression_count = 0
|
||||
|
||||
# Derive token budgets: ratio is relative to the threshold, not total context
|
||||
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
|
||||
self.tail_token_budget = target_tokens
|
||||
self.max_summary_tokens = min(
|
||||
int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
|
||||
)
|
||||
|
||||
if not quiet_mode:
|
||||
logger.info(
|
||||
"Context compressor initialized: model=%s context_length=%d "
|
||||
|
|
|
|||
|
|
@ -998,7 +998,7 @@ class TestThresholdClampAtMinimumContext:
|
|||
assert c.should_compress(c.threshold_tokens) is True
|
||||
|
||||
def test_update_model_threshold_below_context_length(self):
|
||||
"""update_model() must also clamp the threshold."""
|
||||
"""update_model() must also rebuild all derived budgets."""
|
||||
with patch(
|
||||
"agent.context_compressor.get_model_context_length",
|
||||
return_value=200_000,
|
||||
|
|
@ -1010,6 +1010,8 @@ class TestThresholdClampAtMinimumContext:
|
|||
)
|
||||
assert c.threshold_tokens < MINIMUM_CONTEXT_LENGTH
|
||||
assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95)
|
||||
assert c.tail_token_budget == int(c.threshold_tokens * c.summary_target_ratio)
|
||||
assert c.max_summary_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.05)
|
||||
assert c.should_compress(c.threshold_tokens - 1) is False
|
||||
assert c.should_compress(c.threshold_tokens) is True
|
||||
|
||||
|
|
@ -1022,3 +1024,5 @@ class TestThresholdClampAtMinimumContext:
|
|||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
# 50% of 200K = 100K, 95% of 200K = 190K => min(100K, 190K) = 100K
|
||||
assert c.threshold_tokens == 100_000
|
||||
assert c.tail_token_budget == int(100_000 * c.summary_target_ratio)
|
||||
assert c.max_summary_tokens == 10_000
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue