From ce0e189d3e7185d6c8c6af924a1df23e17c6f85c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 17:11:06 -0700
Subject: [PATCH] fix(xai-oauth): break entitlement-403 credential-refresh
 loop, bump grok-4.3 context to 1M (#26664)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don Piedro's 18-minute hang on grok-4.3 traced to two issues PR #26644
didn't cover:

- _recover_with_credential_pool classifies 403 as FailoverReason.auth
  and calls pool.try_refresh_current().  For xAI OAuth on an
  unsubscribed account, refresh succeeds (mints a new token from the
  same account) but the next API call 403s with the same entitlement
  error.  Result: infinite refresh → retry → 403 loop until Ctrl+C
  (1133s in Don's log).  New _is_entitlement_failure(error_context,
  status_code) detects the subscription-shape body ("do not have an
  active Grok subscription" / "out of available resources" + grok /
  "does not have permission" + grok) and short-circuits recovery so
  _summarize_api_error surfaces PR #26644's friendly hint.

- grok-4.3 resolved to 256k via the grok-4 catch-all in
  DEFAULT_CONTEXT_LENGTHS.  Per docs.x.ai/developers/models/grok-4.3
  the model ships with 1M context.  Add explicit grok-4.3 entry
  before the grok-4 fallback (longest-first substring matching
  ensures grok-4.3 and grok-4.3-latest both land on the new value).

Tests: 8 new (23 total in test_codex_xai_oauth_recovery.py).
E2E verified Don's 100-iteration loop bails out with 0 refresh calls
while genuine auth failures still refresh once and recover.
---
 agent/model_metadata.py                       |   1 +
 run_agent.py                                  |  56 ++++++
 .../test_codex_xai_oauth_recovery.py          | 190 ++++++++++++++++++
 3 files changed, 247 insertions(+)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index a10a01e3cc2..41e229416c9 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = {
     "grok-2-vision": 8192,      # grok-2-vision, -1212, -latest
     "grok-4-fast": 2000000,     # grok-4-fast-(non-)reasoning
     "grok-4.20": 2000000,       # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
+    "grok-4.3": 1000000,        # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai
     "grok-4": 256000,           # grok-4, grok-4-0709
     "grok-3": 131072,           # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
     "grok-2": 131072,           # grok-2, grok-2-1212, grok-2-latest
diff --git a/run_agent.py b/run_agent.py
index 2b20d48ede2..da47ca84e34 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4966,6 +4966,44 @@ class AIAgent:
         trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
         _save_trajectory_to_file(trajectory, self.model, completed)
 
+    @staticmethod
+    def _is_entitlement_failure(
+        error_context: Optional[Dict[str, Any]],
+        status_code: Optional[int],
+    ) -> bool:
+        """Detect subscription/entitlement 403s that masquerade as auth failures.
+
+        Returned True only when the body text matches a known entitlement
+        shape AND the status is 401/403.  Refreshing an OAuth token cannot
+        fix an unsubscribed account, so callers should surface the error
+        instead of looping the credential pool.
+
+        Current matches:
+          * xAI OAuth: "do not have an active Grok subscription" /
+            "out of available resources" / "does not have permission" + "grok"
+
+        Extend here for new providers as we discover them (Anthropic's
+        Claude Max OAuth entitlement errors look distinct enough today that
+        the existing 1M-context-beta branch handles them; revisit if other
+        subscription tiers start producing the same loop signature).
+        """
+        if status_code not in (401, 403, None):
+            return False
+        if not isinstance(error_context, dict):
+            return False
+        message = str(error_context.get("message") or "").lower()
+        reason = str(error_context.get("reason") or "").lower()
+        haystack = f"{message} {reason}"
+        if not haystack.strip():
+            return False
+        if "do not have an active grok subscription" in haystack:
+            return True
+        if "out of available resources" in haystack and "grok" in haystack:
+            return True
+        if "does not have permission" in haystack and "grok" in haystack:
+            return True
+        return False
+
     @staticmethod
     def _decorate_xai_entitlement_error(detail: str) -> str:
         """Append a friendly hint when xAI's OAuth surface returns an
@@ -7551,6 +7589,24 @@ class AIAgent:
             return False, True
 
         if effective_reason == FailoverReason.auth:
+            # Subscription/entitlement 403s look like auth failures on the
+            # wire but refresh cannot fix them — the OAuth token is
+            # already valid; the account simply lacks the entitlement
+            # (e.g. xAI OAuth without SuperGrok/X Premium for grok-4.3).
+            # Without this guard, ``try_refresh_current()`` keeps minting
+            # fresh tokens against the same unsubscribed account and the
+            # main agent loop spins re-issuing the same 403 until the
+            # user Ctrl+C's.  Surface the error instead so the friendly
+            # entitlement hint from ``_summarize_api_error`` can land.
+            if self._is_entitlement_failure(error_context, status_code):
+                logger.info(
+                    "Credential %s — entitlement-shaped 403 from %s; "
+                    "skipping pool refresh (account lacks subscription, "
+                    "not a transient auth failure).",
+                    status_code if status_code is not None else "auth",
+                    self.provider or "provider",
+                )
+                return False, has_retried_429
             refreshed = pool.try_refresh_current()
             if refreshed is not None:
                 logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py
index 0f3603d2ca7..7c675f22225 100644
--- a/tests/run_agent/test_codex_xai_oauth_recovery.py
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@@ -349,3 +349,193 @@ def test_codex_transport_native_codex_still_replays_reasoning_in_input():
     assert reasoning_items[0]["encrypted_content"] == "enc_blob"
     # Native Codex still asks for encrypted_content back.
     assert "reasoning.encrypted_content" in kwargs.get("include", [])
+
+
+# ---------------------------------------------------------------------------
+# Fix D: entitlement 403 must NOT trigger credential-pool refresh loop
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "message",
+    [
+        # The exact wire text RaidenTyler and Don Piedro captured.
+        "You have either run out of available resources or do not have an "
+        "active Grok subscription. Manage at https://grok.com",
+        # Permission-style variant from the same 403 body.
+        "The caller does not have permission to execute the specified "
+        "operation for grok-4.3",
+    ],
+)
+def test_is_entitlement_failure_matches_real_xai_bodies(message):
+    from run_agent import AIAgent
+
+    assert AIAgent._is_entitlement_failure(
+        {"message": message, "reason": "permission_denied"},
+        403,
+    )
+
+
+def test_is_entitlement_failure_false_for_status_other_than_401_403():
+    """200/429/500 must never be classified as entitlement, even if body matches."""
+    from run_agent import AIAgent
+
+    body = {
+        "message": "do not have an active Grok subscription",
+    }
+    assert not AIAgent._is_entitlement_failure(body, 500)
+    assert not AIAgent._is_entitlement_failure(body, 429)
+    assert not AIAgent._is_entitlement_failure(body, 200)
+
+
+def test_is_entitlement_failure_false_for_unrelated_auth_errors():
+    """A real auth failure (expired token, wrong key) must keep refreshing."""
+    from run_agent import AIAgent
+
+    # Generic Anthropic-style auth failure
+    assert not AIAgent._is_entitlement_failure(
+        {"message": "Invalid API key", "reason": "authentication_error"},
+        401,
+    )
+    # OAuth token expired
+    assert not AIAgent._is_entitlement_failure(
+        {"message": "Token has expired", "reason": "unauthorized"},
+        401,
+    )
+    # Empty context
+    assert not AIAgent._is_entitlement_failure({}, 401)
+    assert not AIAgent._is_entitlement_failure(None, 401)
+
+
+def test_recover_with_credential_pool_skips_refresh_on_entitlement_403():
+    """The recovery path must NOT call pool.try_refresh_current() on entitlement 403.
+
+    Before the fix, an unsubscribed xAI OAuth account would burn the agent
+    loop indefinitely: refresh → 403 → refresh → 403, infinitely.  With
+    the entitlement guard, recovery returns False so the error surfaces
+    normally with the friendly hint from _summarize_api_error.
+    """
+    from run_agent import AIAgent
+    from agent.error_classifier import FailoverReason
+
+    agent = _make_codex_agent()
+
+    # Wire a fake credential pool that records refresh attempts.
+    refresh_calls = {"n": 0}
+
+    class _FakePool:
+        def try_refresh_current(self):
+            refresh_calls["n"] += 1
+            return MagicMock(id="should_not_be_called")
+
+        def mark_exhausted_and_rotate(self, **_kwargs):
+            return None
+
+        def has_available(self):
+            return False
+
+    agent._credential_pool = _FakePool()
+
+    error_context = {
+        "reason": "The caller does not have permission to execute the specified operation",
+        "message": "You have either run out of available resources or do not have an "
+                   "active Grok subscription. Manage at https://grok.com",
+    }
+
+    recovered, _retried_429 = agent._recover_with_credential_pool(
+        status_code=403,
+        has_retried_429=False,
+        classified_reason=FailoverReason.auth,
+        error_context=error_context,
+    )
+
+    assert recovered is False, "Entitlement 403 must surface, not silently recover"
+    assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on entitlement 403"
+
+
+def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure():
+    """Regression guard: legitimate auth errors must still trigger refresh."""
+    from run_agent import AIAgent
+    from agent.error_classifier import FailoverReason
+
+    agent = _make_codex_agent()
+
+    refresh_calls = {"n": 0}
+
+    class _FakePool:
+        def try_refresh_current(self):
+            refresh_calls["n"] += 1
+            # Return a fake refreshed entry — semantically "refresh worked"
+            entry = MagicMock()
+            entry.id = "entry_refreshed"
+            return entry
+
+        def mark_exhausted_and_rotate(self, **_kwargs):
+            return None
+
+        def has_available(self):
+            return False
+
+    agent._credential_pool = _FakePool()
+    # _swap_credential is called by the recovery path — stub it out
+    agent._swap_credential = MagicMock()
+
+    error_context = {
+        "reason": "authentication_error",
+        "message": "Invalid API key",
+    }
+
+    recovered, _retried_429 = agent._recover_with_credential_pool(
+        status_code=401,
+        has_retried_429=False,
+        classified_reason=FailoverReason.auth,
+        error_context=error_context,
+    )
+
+    assert recovered is True, "Genuine auth failure must still recover via refresh"
+    assert refresh_calls["n"] == 1
+
+
+# ---------------------------------------------------------------------------
+# Fix E: grok-4.3 context length must be 1M, not 256K
+# ---------------------------------------------------------------------------
+
+
+def test_grok_4_3_context_length_is_1m():
+    """grok-4.3 ships with 1M context per docs.x.ai/developers/models/grok-4.3.
+
+    Hermes' substring-match fallback used to return 256k (from the
+    "grok-4" catch-all) which under-reported the model's real capacity.
+    """
+    from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
+
+    # The entry exists with the expected value.
+    assert DEFAULT_CONTEXT_LENGTHS["grok-4.3"] == 1_000_000
+
+    # And longest-first substring matching resolves grok-4.3 and
+    # grok-4.3-latest to the new value, NOT the grok-4 catch-all.
+    for slug in ("grok-4.3", "grok-4.3-latest"):
+        matched_key = max(
+            (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
+            key=len,
+        )
+        assert matched_key == "grok-4.3", (
+            f"Expected longest-first match to land on grok-4.3 for {slug}, "
+            f"got {matched_key}"
+        )
+        assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 1_000_000
+
+
+def test_grok_4_still_resolves_to_256k():
+    """Regression guard: grok-4 (non-.3) must still resolve to 256k."""
+    from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
+
+    for slug in ("grok-4", "grok-4-0709"):
+        matched_key = max(
+            (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
+            key=len,
+        )
+        # grok-4-0709 contains "grok-4" but not "grok-4.3"; matched key
+        # must be "grok-4" (or a more specific variant family if one is
+        # ever added).  The 256k contract must hold.
+        assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000