diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index ef40cbfaf..22b3c4af3 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -288,6 +288,23 @@ class ContextCompressor(ContextEngine):
     def name(self) -> str:
         return "compressor"
 
+    def _recompute_thresholds_and_budgets(self) -> None:
+        """Rebuild derived token thresholds after a context/model change."""
+        self.threshold_tokens = max(
+            int(self.context_length * self.threshold_percent),
+            MINIMUM_CONTEXT_LENGTH,
+        )
+        # Threshold must stay below context_length so compression can trigger
+        # before the provider rejects the request outright.
+        self.threshold_tokens = min(
+            self.threshold_tokens, int(self.context_length * 0.95)
+        )
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
+        self.tail_token_budget = target_tokens
+        self.max_summary_tokens = min(
+            int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+        )
+
     def on_session_reset(self) -> None:
         """Reset all per-session state for /new or /reset."""
         super().on_session_reset()
@@ -314,10 +331,7 @@ class ContextCompressor(ContextEngine):
         self.provider = provider
         self.api_mode = api_mode
         self.context_length = context_length
-        self.threshold_tokens = max(
-            int(context_length * self.threshold_percent),
-            MINIMUM_CONTEXT_LENGTH,
-        )
+        self._recompute_thresholds_and_budgets()
 
     def __init__(
         self,
@@ -354,19 +368,9 @@ class ContextCompressor(ContextEngine):
         # the percentage would suggest a lower value.  This prevents premature
         # compression on large-context models at 50% while keeping the % sane
         # for models right at the minimum.
-        self.threshold_tokens = max(
-            int(self.context_length * threshold_percent),
-            MINIMUM_CONTEXT_LENGTH,
-        )
+        self._recompute_thresholds_and_budgets()
         self.compression_count = 0
 
-        # Derive token budgets: ratio is relative to the threshold, not total context
-        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
-        self.tail_token_budget = target_tokens
-        self.max_summary_tokens = min(
-            int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
-        )
-
         if not quiet_mode:
             logger.info(
                 "Context compressor initialized: model=%s context_length=%d "
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 8072a58d9..a9fa8e423 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -4,6 +4,7 @@ import pytest
 from unittest.mock import patch, MagicMock
 
 from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
+from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
 
 
 @pytest.fixture()
@@ -969,3 +970,59 @@ class TestTruncateToolCallArgsJson:
         parsed = _json.loads(shrunk)
         assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
         assert parsed["content"].endswith("...[truncated]")
+
+class TestThresholdClampAtMinimumContext:
+    """Regression test for #14690: when context_length == MINIMUM_CONTEXT_LENGTH,
+    the max() floor pushed threshold_tokens to 100% of the context window,
+    making auto-compression impossible (API errors before threshold is reached).
+    The fix clamps threshold_tokens to at most 95% of context_length."""
+
+    def test_init_threshold_below_context_length(self):
+        """__init__ must produce a threshold strictly below context_length."""
+        with patch(
+            "agent.context_compressor.get_model_context_length",
+            return_value=MINIMUM_CONTEXT_LENGTH,
+        ):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        assert c.threshold_tokens < c.context_length
+        assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95)
+
+    def test_should_compress_at_threshold(self):
+        """should_compress returns True at the threshold and False just below."""
+        with patch(
+            "agent.context_compressor.get_model_context_length",
+            return_value=MINIMUM_CONTEXT_LENGTH,
+        ):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        assert c.should_compress(c.threshold_tokens - 1) is False
+        assert c.should_compress(c.threshold_tokens) is True
+
+    def test_update_model_threshold_below_context_length(self):
+        """update_model() must also rebuild all derived budgets."""
+        with patch(
+            "agent.context_compressor.get_model_context_length",
+            return_value=200_000,
+        ):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        c.update_model(
+            model="small/model",
+            context_length=MINIMUM_CONTEXT_LENGTH,
+        )
+        assert c.threshold_tokens < MINIMUM_CONTEXT_LENGTH
+        assert c.threshold_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.95)
+        assert c.tail_token_budget == int(c.threshold_tokens * c.summary_target_ratio)
+        assert c.max_summary_tokens == int(MINIMUM_CONTEXT_LENGTH * 0.05)
+        assert c.should_compress(c.threshold_tokens - 1) is False
+        assert c.should_compress(c.threshold_tokens) is True
+
+    def test_large_context_unaffected_by_clamp(self):
+        """For large models the 95% cap does not change the threshold."""
+        with patch(
+            "agent.context_compressor.get_model_context_length",
+            return_value=200_000,
+        ):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        # 50% of 200K = 100K, 95% of 200K = 190K => min(100K, 190K) = 100K
+        assert c.threshold_tokens == 100_000
+        assert c.tail_token_budget == int(100_000 * c.summary_target_ratio)
+        assert c.max_summary_tokens == 10_000