fix(agent): clamp compression threshold below context_length to ensure compression can trigger (#14690)

When context_length equals MINIMUM_CONTEXT_LENGTH (64000), the max() floor in threshold calculation pushed threshold_tokens to 100% of the context window, making auto-compression impossible since the API would error before tokens ever reached the threshold. Add a min() clamp after the max() floor so that threshold never exceeds 95% of context_length, giving 5% headroom for compression to trigger. Applied to both __init__ and update_model.
2026-05-07 02:51:50 +00:00 · 2026-04-25 08:55:23 +10:00 · 2026-04-25 08:55:23 +10:00 · 89389f1894
commit 89389f1894
parent 00c3d848d8
2 changed files with 65 additions and 0 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -318,6 +318,12 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        # Clamp: threshold must stay below context_length so compression can
+        # actually trigger before the API rejects the request.  The 95% cap
+        # gives 5% headroom — enough to catch the threshold in time.
+        self.threshold_tokens = min(
+            self.threshold_tokens, int(context_length * 0.95)
+        )

    def __init__(
        self,
@ -358,6 +364,12 @@ class ContextCompressor(ContextEngine):
            int(self.context_length * threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        # Clamp: threshold must stay below context_length so compression can
+        # actually trigger before the API rejects the request.  The 95% cap
+        # gives 5% headroom — enough to catch the threshold in time.
+        self.threshold_tokens = min(
+            self.threshold_tokens, int(self.context_length * 0.95)
+        )
        self.compression_count = 0

        # Derive token budgets: ratio is relative to the threshold, not total context