fix(agent): clamp compression threshold below context_length to ensure compression can trigger (#14690)

When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the max()
floor in threshold calculation pushed threshold_tokens to 100% of the
context window, making auto-compression impossible since the API would
error before tokens ever reached the threshold. Add a min() clamp after
the max() floor so that threshold never exceeds 95% of context_length,
giving 5% headroom for compression to trigger. Applied to both __init__
and update_model.
This commit is contained in:
Tranquil-Flow 2026-04-25 08:55:23 +10:00
parent 00c3d848d8
commit 89389f1894
2 changed files with 65 additions and 0 deletions

View file

@ -318,6 +318,12 @@ class ContextCompressor(ContextEngine):
int(context_length * self.threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
# Clamp: threshold must stay below context_length so compression can
# actually trigger before the API rejects the request. The 95% cap
# gives 5% headroom — enough to catch the threshold in time.
self.threshold_tokens = min(
self.threshold_tokens, int(context_length * 0.95)
)
def __init__(
self,
@ -358,6 +364,12 @@ class ContextCompressor(ContextEngine):
int(self.context_length * threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
# Clamp: threshold must stay below context_length so compression can
# actually trigger before the API rejects the request. The 95% cap
# gives 5% headroom — enough to catch the threshold in time.
self.threshold_tokens = min(
self.threshold_tokens, int(self.context_length * 0.95)
)
self.compression_count = 0
# Derive token budgets: ratio is relative to the threshold, not total context