diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 70588940eda..2eb896a9934 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -656,9 +656,8 @@ class ContextCompressor(ContextEngine):
         self.provider = provider
         self.api_mode = api_mode
         self.context_length = context_length
-        self.threshold_tokens = max(
-            int(context_length * self.threshold_percent),
-            MINIMUM_CONTEXT_LENGTH,
+        self.threshold_tokens = self._compute_threshold_tokens(
+            context_length, self.threshold_percent
         )
         # Recalculate token budgets for the new context length so the
         # compressor stays calibrated after a model switch (e.g. 200K → 32K).
@@ -690,6 +689,40 @@ class ContextCompressor(ContextEngine):
         self.awaiting_real_usage_after_compression = False
         self._ineffective_compression_count = 0
 
+    # When the MINIMUM_CONTEXT_LENGTH floor meets/exceeds a small context
+    # window, compacting at the percentage (50% → 32K of a 64K window) wastes
+    # half the usable context. Trigger near the top of the window instead so a
+    # minimum-context model uses most of its budget before compacting — same
+    # rationale as the gpt-5.5/Codex 85% autoraise.
+    _MIN_CTX_TRIGGER_RATIO = 0.85
+
+    @staticmethod
+    def _compute_threshold_tokens(context_length: int, threshold_percent: float) -> int:
+        """Compute the compaction trigger threshold in tokens.
+
+        The base value is ``context_length * threshold_percent``, floored at
+        ``MINIMUM_CONTEXT_LENGTH`` so large-context models don't compress
+        prematurely at 50%. BUT that floor degenerates at small windows: for a
+        model whose ``context_length`` is at/below the minimum (e.g. a 64K
+        local model), ``max(0.5*64000, 64000) == 64000`` makes the threshold
+        equal the ENTIRE window — auto-compression can never fire because the
+        provider rejects the request before usage reaches 100% (#14690).
+
+        When the floor would meet or exceed the context window, trigger at
+        ``_MIN_CTX_TRIGGER_RATIO`` (85%) of the window — high enough that a
+        small model uses most of its context before compacting, but below
+        100% so compaction fires before the provider rejects the request.
+        """
+        pct_value = int(context_length * threshold_percent)
+        floored = max(pct_value, MINIMUM_CONTEXT_LENGTH)
+        # If flooring pushed the threshold to/over the window it can never be
+        # reached. Trigger at 85% of the window so a minimum-context model
+        # rides most of its budget before compacting instead of wasting half.
+        if context_length > 0 and floored >= context_length:
+            return max(1, min(int(context_length * ContextCompressor._MIN_CTX_TRIGGER_RATIO),
+                              context_length - 1))
+        return floored
+
     def __init__(
         self,
         model: str,
@@ -730,10 +763,11 @@ class ContextCompressor(ContextEngine):
         # Floor: never compress below MINIMUM_CONTEXT_LENGTH tokens even if
         # the percentage would suggest a lower value.  This prevents premature
         # compression on large-context models at 50% while keeping the % sane
-        # for models right at the minimum.
-        self.threshold_tokens = max(
-            int(self.context_length * threshold_percent),
-            MINIMUM_CONTEXT_LENGTH,
+        # for models right at the minimum. _compute_threshold_tokens also
+        # guards the degenerate case where the floor would equal/exceed the
+        # window (small models), so auto-compression can still fire (#14690).
+        self.threshold_tokens = self._compute_threshold_tokens(
+            self.context_length, threshold_percent
         )
         self.compression_count = 0
 
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 24b1c4cbe2b..084cb446b4d 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -204,6 +204,44 @@ class TestCompress:
             f"#49307), found {count}x:\n{summary}"
         )
 
+    def test_threshold_below_window_at_minimum_ctx(self):
+        """Regression for #14690: at context_length == MINIMUM_CONTEXT_LENGTH
+        the floored threshold used to equal the whole window, so
+        auto-compression could never fire. It now triggers at 85% of the
+        window — high enough not to waste the small budget, below 100% so it
+        actually fires."""
+        from agent.context_compressor import MINIMUM_CONTEXT_LENGTH
+        t = ContextCompressor._compute_threshold_tokens(MINIMUM_CONTEXT_LENGTH, 0.50)
+        assert t < MINIMUM_CONTEXT_LENGTH
+        assert t == 54400  # 85% of 64000
+
+    def test_threshold_below_window_for_small_ctx(self):
+        # 32K model: the 64000 floor exceeds the window — trigger at 85%.
+        t = ContextCompressor._compute_threshold_tokens(32000, 0.50)
+        assert t == 27200  # 85% of 32000
+        assert t < 32000
+
+    def test_threshold_floored_for_large_ctx(self):
+        from agent.context_compressor import MINIMUM_CONTEXT_LENGTH
+        # 200K model at 50% = 100000 (above floor) — unchanged.
+        assert ContextCompressor._compute_threshold_tokens(200000, 0.50) == 100000
+        # 100K model at 50% = 50000 (below floor) — floored to MINIMUM.
+        assert ContextCompressor._compute_threshold_tokens(100000, 0.50) == MINIMUM_CONTEXT_LENGTH
+
+    def test_minimum_ctx_model_can_actually_compress(self):
+        """End-to-end: a model at exactly the minimum context length must have
+        should_compress() fire below its window (at the 85% trigger), not only
+        at 100%."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=64000):
+            c = ContextCompressor(model="small-64k", quiet_mode=True)
+            c.context_length = 64000
+            c.threshold_tokens = c._compute_threshold_tokens(64000, c.threshold_percent)
+        assert c.threshold_tokens == 54400
+        assert c.threshold_tokens < 64000
+        # At 85%+ usage compaction fires; below it, it doesn't (no premature compact).
+        assert c.should_compress(55000) is True
+        assert c.should_compress(40000) is False
+
     def test_compression_increments_count(self, compressor):
         msgs = self._make_messages(10)
         # Default config (abort_on_summary_failure=False) — fallback path