mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix(agent): clamp compression threshold below context_length to ensure compression can trigger (#14690)
When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the max() floor in threshold calculation pushed threshold_tokens to 100% of the context window, making auto-compression impossible since the API would error before tokens ever reached the threshold. Add a min() clamp after the max() floor so that threshold never exceeds 95% of context_length, giving 5% headroom for compression to trigger. Applied to both __init__ and update_model.
This commit is contained in:
parent
00c3d848d8
commit
89389f1894
2 changed files with 65 additions and 0 deletions
|
|
@ -318,6 +318,12 @@ class ContextCompressor(ContextEngine):
|
|||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Clamp: threshold must stay below context_length so compression can
|
||||
# actually trigger before the API rejects the request. The 95% cap
|
||||
# gives 5% headroom — enough to catch the threshold in time.
|
||||
self.threshold_tokens = min(
|
||||
self.threshold_tokens, int(context_length * 0.95)
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -358,6 +364,12 @@ class ContextCompressor(ContextEngine):
|
|||
int(self.context_length * threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Clamp: threshold must stay below context_length so compression can
|
||||
# actually trigger before the API rejects the request. The 95% cap
|
||||
# gives 5% headroom — enough to catch the threshold in time.
|
||||
self.threshold_tokens = min(
|
||||
self.threshold_tokens, int(self.context_length * 0.95)
|
||||
)
|
||||
self.compression_count = 0
|
||||
|
||||
# Derive token budgets: ratio is relative to the threshold, not total context
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue