diff --git a/run_agent.py b/run_agent.py
index 4ee4de51b2..48092501f6 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7380,6 +7380,59 @@ class AIAgent:
                     # compress history and retry, not abort immediately.
                     status_code = getattr(api_error, "status_code", None)
 
+                    # ── Anthropic long-context tier gate ──────────────────
+                    # Anthropic returns HTTP 429 "Extra usage is required for
+                    # long context requests" when a Claude Max (or similar)
+                    # subscription doesn't include the 1M-context tier.  This
+                    # is NOT a transient rate limit — retrying or switching
+                    # credentials won't help.  Reduce context to 200k (the
+                    # standard tier) and compress.
+                    _is_long_context_tier_error = (
+                        status_code == 429
+                        and "extra usage" in error_msg
+                        and "long context" in error_msg
+                    )
+                    if _is_long_context_tier_error:
+                        _reduced_ctx = 200000
+                        compressor = self.context_compressor
+                        old_ctx = compressor.context_length
+                        if old_ctx > _reduced_ctx:
+                            compressor.context_length = _reduced_ctx
+                            compressor.threshold_tokens = int(
+                                _reduced_ctx * compressor.threshold_percent
+                            )
+                            compressor._context_probed = True
+                            # Don't persist — this is a subscription-tier
+                            # limitation, not a model capability.  If the user
+                            # later enables extra usage the 1M limit should
+                            # come back automatically.
+                            compressor._context_probe_persistable = False
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Anthropic long-context tier "
+                                f"requires extra usage — reducing context: "
+                                f"{old_ctx:,} → {_reduced_ctx:,} tokens",
+                                force=True,
+                            )
+
+                        compression_attempts += 1
+                        if compression_attempts <= max_compression_attempts:
+                            original_len = len(messages)
+                            messages, active_system_prompt = self._compress_context(
+                                messages, system_message,
+                                approx_tokens=approx_tokens,
+                                task_id=effective_task_id,
+                            )
+                            if len(messages) < original_len or old_ctx > _reduced_ctx:
+                                self._emit_status(
+                                    f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
+                                    f"(was {old_ctx:,}), retrying..."
+                                )
+                                time.sleep(2)
+                                restart_with_compressed_messages = True
+                                break
+                        # Fall through to normal error handling if compression
+                        # is exhausted or didn't help.
+
                     # Eager fallback for rate-limit errors (429 or quota exhaustion).
                     # When a fallback model is configured, switch immediately instead
                     # of burning through retries with exponential backoff -- the
diff --git a/tests/test_long_context_tier_429.py b/tests/test_long_context_tier_429.py
new file mode 100644
index 0000000000..ac2fcf3113
--- /dev/null
+++ b/tests/test_long_context_tier_429.py
@@ -0,0 +1,163 @@
+"""Tests for Anthropic long-context tier 429 handling.
+
+When Claude Max users without "extra usage" hit the 1M context tier,
+Anthropic returns HTTP 429 "Extra usage is required for long context
+requests."  This is NOT a transient rate limit — the agent should
+reduce context_length to 200k and compress instead of retrying.
+"""
+
+import pytest
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+
+# ---------------------------------------------------------------------------
+# Detection logic
+# ---------------------------------------------------------------------------
+
+
+class TestLongContextTierDetection:
+    """Verify the detection heuristic matches the Anthropic error."""
+
+    @staticmethod
+    def _is_long_context_tier_error(status_code, error_msg):
+        error_msg = error_msg.lower()
+        return (
+            status_code == 429
+            and "extra usage" in error_msg
+            and "long context" in error_msg
+        )
+
+    def test_matches_anthropic_error(self):
+        assert self._is_long_context_tier_error(
+            429,
+            "Extra usage is required for long context requests.",
+        )
+
+    def test_matches_lowercase(self):
+        assert self._is_long_context_tier_error(
+            429,
+            "extra usage is required for long context requests.",
+        )
+
+    def test_rejects_normal_429(self):
+        assert not self._is_long_context_tier_error(
+            429,
+            "Rate limit exceeded. Please retry after 30 seconds.",
+        )
+
+    def test_rejects_wrong_status(self):
+        assert not self._is_long_context_tier_error(
+            400,
+            "Extra usage is required for long context requests.",
+        )
+
+    def test_rejects_partial_match(self):
+        """Both 'extra usage' AND 'long context' must be present."""
+        assert not self._is_long_context_tier_error(
+            429, "extra usage required"
+        )
+        assert not self._is_long_context_tier_error(
+            429, "long context requests not supported"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Context reduction
+# ---------------------------------------------------------------------------
+
+
+class TestContextReduction:
+    """When the long-context tier error fires, context_length should
+    drop to 200k and the reduced flag should be set correctly."""
+
+    def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5):
+        c = SimpleNamespace(
+            context_length=context_length,
+            threshold_percent=threshold_percent,
+            threshold_tokens=int(context_length * threshold_percent),
+            _context_probed=False,
+            _context_probe_persistable=False,
+        )
+        return c
+
+    def test_reduces_1m_to_200k(self):
+        comp = self._make_compressor(1_000_000)
+        reduced_ctx = 200_000
+
+        if comp.context_length > reduced_ctx:
+            comp.context_length = reduced_ctx
+            comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent)
+            comp._context_probed = True
+            comp._context_probe_persistable = False
+
+        assert comp.context_length == 200_000
+        assert comp.threshold_tokens == 100_000
+        assert comp._context_probed is True
+        # Must NOT persist — subscription tier, not model capability
+        assert comp._context_probe_persistable is False
+
+    def test_no_reduction_when_already_200k(self):
+        comp = self._make_compressor(200_000)
+        reduced_ctx = 200_000
+
+        original = comp.context_length
+        if comp.context_length > reduced_ctx:
+            comp.context_length = reduced_ctx
+
+        assert comp.context_length == original  # unchanged
+
+    def test_no_reduction_when_below_200k(self):
+        comp = self._make_compressor(128_000)
+        reduced_ctx = 200_000
+
+        original = comp.context_length
+        if comp.context_length > reduced_ctx:
+            comp.context_length = reduced_ctx
+
+        assert comp.context_length == original  # unchanged
+
+
+# ---------------------------------------------------------------------------
+# Integration: agent error handler path
+# ---------------------------------------------------------------------------
+
+
+class TestAgentErrorPath:
+    """Verify the long-context 429 doesn't hit the generic rate-limit
+    or client-error handlers."""
+
+    def test_long_context_429_not_treated_as_rate_limit(self):
+        """The error should be intercepted before the generic
+        is_rate_limited check fires a fallback switch."""
+        error_msg = "extra usage is required for long context requests."
+        status_code = 429
+
+        # The long-context check fires first
+        _is_long_context_tier_error = (
+            status_code == 429
+            and "extra usage" in error_msg
+            and "long context" in error_msg
+        )
+        assert _is_long_context_tier_error
+
+        # So we never reach the generic rate-limit path
+        # (in the real code, `break` exits the retry loop)
+
+    def test_normal_429_still_treated_as_rate_limit(self):
+        """A normal 429 should NOT match the long-context check."""
+        error_msg = "rate limit exceeded"
+        status_code = 429
+
+        _is_long_context_tier_error = (
+            status_code == 429
+            and "extra usage" in error_msg
+            and "long context" in error_msg
+        )
+        assert not _is_long_context_tier_error
+
+        is_rate_limited = (
+            status_code == 429
+            or "rate limit" in error_msg
+        )
+        assert is_rate_limited