diff --git a/gateway/run.py b/gateway/run.py
index b7adadd3a..b4b6c6ef0 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1757,9 +1757,9 @@ class GatewayRunner:
         # Token source priority:
         # 1. Actual API-reported prompt_tokens from the last turn
         #    (stored in session_entry.last_prompt_tokens)
-        # 2. Rough char-based estimate (str(msg)//4) with a 1.4x
-        #    safety factor to account for overestimation on tool-heavy
-        #    conversations (code/JSON tokenizes at 5-7+ chars/token).
+        # 2. Rough char-based estimate (str(msg)//4). Overestimates
+        #    by 30-50% on code/JSON-heavy sessions, but that just
+        #    means hygiene fires a bit early — safe and harmless.
         # -----------------------------------------------------------------
         if history and len(history) >= 4:
             from agent.model_metadata import (
@@ -1845,29 +1845,20 @@ class GatewayRunner:
 
                 # Prefer actual API-reported tokens from the last turn
                 # (stored in session entry) over the rough char-based estimate.
-                # The rough estimate (str(msg)//4) overestimates by 30-50% on
-                # tool-heavy/code-heavy conversations, causing premature compression.
                 _stored_tokens = session_entry.last_prompt_tokens
                 if _stored_tokens > 0:
                     _approx_tokens = _stored_tokens
                     _token_source = "actual"
                 else:
                     _approx_tokens = estimate_messages_tokens_rough(history)
-                    # Apply safety factor only for rough estimates.
-                    # Cap the adjusted threshold at 95% of context length
-                    # so it never exceeds what the model can actually handle
-                    # (the 1.4x factor previously pushed the threshold above
-                    # the model's context limit for ~200K models like GLM-5).
-                    _max_safe_threshold = int(_hyg_context_length * 0.95)
-                    _compress_token_threshold = min(
-                        int(_compress_token_threshold * 1.4),
-                        _max_safe_threshold,
-                    )
-                    _warn_token_threshold = min(
-                        int(_warn_token_threshold * 1.4),
-                        _hyg_context_length,
-                    )
                     _token_source = "estimated"
+                    # Note: rough estimates overestimate by 30-50% for code/JSON-heavy
+                    # sessions, but that just means hygiene fires a bit early — which
+                    # is safe and harmless.  The 85% threshold already provides ample
+                    # headroom (agent's own compressor runs at 50%).  A previous 1.4x
+                    # multiplier tried to compensate by inflating the threshold, but
+                    # 85% * 1.4 = 119% of context — which exceeds the model's limit
+                    # and prevented hygiene from ever firing for ~200K models (GLM-5).
 
                 _needs_compress = _approx_tokens >= _compress_token_threshold
 
diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py
index 1fadd8dbc..80d249347 100644
--- a/tests/gateway/test_session_hygiene.py
+++ b/tests/gateway/test_session_hygiene.py
@@ -212,71 +212,59 @@ class TestSessionHygieneWarnThreshold:
         assert post_compress_tokens < warn_threshold
 
 
-class TestEstimatedTokenSafetyCap:
-    """Verify the 1.4x safety factor on rough estimates is capped at 95% of
-    context length, preventing the threshold from exceeding the model's
-    actual limit.
+class TestEstimatedTokenThreshold:
+    """Verify that hygiene thresholds are always below the model's context
+    limit — for both actual and estimated token counts.
 
-    Bug: For ~200K models (GLM-5-turbo), the uncapped 1.4x pushed the
-    threshold to 238K — above the model's limit — so hygiene never fired.
+    Regression: a previous 1.4x multiplier on rough estimates pushed the
+    threshold to 85% * 1.4 = 119% of context, which exceeded the model's
+    limit and prevented hygiene from ever firing for ~200K models (GLM-5).
+    The fix removed the multiplier entirely — the 85% threshold already
+    provides ample headroom over the agent's 50% compressor.
     """
 
-    def test_uncapped_14x_would_exceed_context(self):
-        """Without the cap, 200K * 0.85 * 1.4 = 238K > 200K (broken)."""
+    def test_threshold_below_context_for_200k_model(self):
+        """Hygiene threshold must always be below model context."""
         context_length = 200_000
-        threshold_pct = 0.85
-        raw_threshold = int(context_length * threshold_pct)  # 170K
-        uncapped = int(raw_threshold * 1.4)  # 238K
-        assert uncapped > context_length, (
-            "Uncapped 1.4x should exceed model context (this is the bug)"
-        )
+        threshold = int(context_length * 0.85)
+        assert threshold < context_length
 
-    def test_capped_14x_stays_within_context(self):
-        """With the cap, the threshold stays at 95% of context length."""
-        context_length = 200_000
-        threshold_pct = 0.85
-        raw_threshold = int(context_length * threshold_pct)  # 170K
-        max_safe = int(context_length * 0.95)  # 190K
-        capped = min(int(raw_threshold * 1.4), max_safe)
-        assert capped <= context_length, (
-            f"Capped threshold ({capped:,}) must not exceed context ({context_length:,})"
-        )
-        assert capped == max_safe, (
-            f"For 200K models, the cap should bind: expected {max_safe:,}, got {capped:,}"
-        )
-
-    def test_cap_does_not_affect_large_context_models(self):
-        """For 1M+ models the 1.4x factor stays below 95%, so cap is no-op."""
-        context_length = 1_000_000
-        threshold_pct = 0.85
-        raw_threshold = int(context_length * threshold_pct)  # 850K
-        max_safe = int(context_length * 0.95)  # 950K
-        uncapped = int(raw_threshold * 1.4)  # 1,190K — but that's > 950K
-        capped = min(uncapped, max_safe)
-        # For very large models the cap still applies but the resulting
-        # threshold (950K) is still large enough to prevent premature compression
-        assert capped <= context_length
-
-    def test_cap_for_128k_model(self):
-        """128K model: 128K * 0.85 * 1.4 = 152K — exceeds 128K, cap binds."""
+    def test_threshold_below_context_for_128k_model(self):
         context_length = 128_000
-        threshold_pct = 0.85
-        raw_threshold = int(context_length * threshold_pct)  # 108,800
-        max_safe = int(context_length * 0.95)  # 121,600
-        uncapped = int(raw_threshold * 1.4)  # 152,320
-        capped = min(uncapped, max_safe)
-        assert uncapped > context_length, "1.4x exceeds 128K context"
-        assert capped == max_safe, "Cap should bind for 128K models"
-        assert capped < context_length, "Capped value must be below context limit"
+        threshold = int(context_length * 0.85)
+        assert threshold < context_length
 
-    def test_warn_threshold_capped_at_context_length(self):
-        """Warn threshold (0.95 * 1.4) must be capped at context_length."""
+    def test_no_multiplier_means_same_threshold_for_estimated_and_actual(self):
+        """Without the 1.4x, estimated and actual token paths use the same threshold."""
         context_length = 200_000
-        raw_warn = int(context_length * 0.95)  # 190K
-        uncapped_warn = int(raw_warn * 1.4)  # 266K
-        capped_warn = min(uncapped_warn, context_length)
-        assert uncapped_warn > context_length
-        assert capped_warn == context_length
+        threshold_pct = 0.85
+        threshold = int(context_length * threshold_pct)
+        # Both paths should use 170K — no inflation
+        assert threshold == 170_000
+
+    def test_warn_threshold_below_context(self):
+        """Warn threshold (95%) must be below context length."""
+        for ctx in (128_000, 200_000, 1_000_000):
+            warn = int(ctx * 0.95)
+            assert warn < ctx
+
+    def test_overestimate_fires_early_but_safely(self):
+        """If rough estimate is 50% inflated, hygiene fires at ~57% actual usage.
+
+        That's between the agent's 50% threshold and the model's limit —
+        safe and harmless.
+        """
+        context_length = 200_000
+        threshold = int(context_length * 0.85)  # 170K
+        # If actual tokens = 113K, rough estimate = 113K * 1.5 = 170K
+        # Hygiene fires when estimate hits 170K, actual is ~113K = 57% of ctx
+        actual_when_fires = threshold / 1.5
+        assert actual_when_fires > context_length * 0.50, (
+            "Early fire should still be above agent's 50% threshold"
+        )
+        assert actual_when_fires < context_length, (
+            "Early fire must be well below model limit"
+        )
 
 
 class TestTokenEstimation: