From ce0e189d3e7185d6c8c6af924a1df23e17c6f85c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 15 May 2026 17:11:06 -0700 Subject: [PATCH] fix(xai-oauth): break entitlement-403 credential-refresh loop, bump grok-4.3 context to 1M (#26664) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don Piedro's 18-minute hang on grok-4.3 traced to two issues PR #26644 didn't cover: - _recover_with_credential_pool classifies 403 as FailoverReason.auth and calls pool.try_refresh_current(). For xAI OAuth on an unsubscribed account, refresh succeeds (mints a new token from the same account) but the next API call 403s with the same entitlement error. Result: infinite refresh → retry → 403 loop until Ctrl+C (1133s in Don's log). New _is_entitlement_failure(error_context, status_code) detects the subscription-shape body ("do not have an active Grok subscription" / "out of available resources" + grok / "does not have permission" + grok) and short-circuits recovery so _summarize_api_error surfaces PR #26644's friendly hint. - grok-4.3 resolved to 256k via the grok-4 catch-all in DEFAULT_CONTEXT_LENGTHS. Per docs.x.ai/developers/models/grok-4.3 the model ships with 1M context. Add explicit grok-4.3 entry before the grok-4 fallback (longest-first substring matching ensures grok-4.3 and grok-4.3-latest both land on the new value). Tests: 8 new (23 total in test_codex_xai_oauth_recovery.py). E2E verified Don's 100-iteration loop bails out with 0 refresh calls while genuine auth failures still refresh once and recover. --- agent/model_metadata.py | 1 + run_agent.py | 56 ++++++ .../test_codex_xai_oauth_recovery.py | 190 ++++++++++++++++++ 3 files changed, 247 insertions(+) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index a10a01e3cc2..41e229416c9 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = { "grok-2-vision": 8192, # grok-2-vision, -1212, -latest "grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning "grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309 + "grok-4.3": 1000000, # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai "grok-4": 256000, # grok-4, grok-4-0709 "grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast "grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest diff --git a/run_agent.py b/run_agent.py index 2b20d48ede2..da47ca84e34 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4966,6 +4966,44 @@ class AIAgent: trajectory = self._convert_to_trajectory_format(messages, user_query, completed) _save_trajectory_to_file(trajectory, self.model, completed) + @staticmethod + def _is_entitlement_failure( + error_context: Optional[Dict[str, Any]], + status_code: Optional[int], + ) -> bool: + """Detect subscription/entitlement 403s that masquerade as auth failures. + + Returned True only when the body text matches a known entitlement + shape AND the status is 401/403. Refreshing an OAuth token cannot + fix an unsubscribed account, so callers should surface the error + instead of looping the credential pool. + + Current matches: + * xAI OAuth: "do not have an active Grok subscription" / + "out of available resources" / "does not have permission" + "grok" + + Extend here for new providers as we discover them (Anthropic's + Claude Max OAuth entitlement errors look distinct enough today that + the existing 1M-context-beta branch handles them; revisit if other + subscription tiers start producing the same loop signature). + """ + if status_code not in (401, 403, None): + return False + if not isinstance(error_context, dict): + return False + message = str(error_context.get("message") or "").lower() + reason = str(error_context.get("reason") or "").lower() + haystack = f"{message} {reason}" + if not haystack.strip(): + return False + if "do not have an active grok subscription" in haystack: + return True + if "out of available resources" in haystack and "grok" in haystack: + return True + if "does not have permission" in haystack and "grok" in haystack: + return True + return False + @staticmethod def _decorate_xai_entitlement_error(detail: str) -> str: """Append a friendly hint when xAI's OAuth surface returns an @@ -7551,6 +7589,24 @@ class AIAgent: return False, True if effective_reason == FailoverReason.auth: + # Subscription/entitlement 403s look like auth failures on the + # wire but refresh cannot fix them — the OAuth token is + # already valid; the account simply lacks the entitlement + # (e.g. xAI OAuth without SuperGrok/X Premium for grok-4.3). + # Without this guard, ``try_refresh_current()`` keeps minting + # fresh tokens against the same unsubscribed account and the + # main agent loop spins re-issuing the same 403 until the + # user Ctrl+C's. Surface the error instead so the friendly + # entitlement hint from ``_summarize_api_error`` can land. + if self._is_entitlement_failure(error_context, status_code): + logger.info( + "Credential %s — entitlement-shaped 403 from %s; " + "skipping pool refresh (account lacks subscription, " + "not a transient auth failure).", + status_code if status_code is not None else "auth", + self.provider or "provider", + ) + return False, has_retried_429 refreshed = pool.try_refresh_current() if refreshed is not None: logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}") diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py index 0f3603d2ca7..7c675f22225 100644 --- a/tests/run_agent/test_codex_xai_oauth_recovery.py +++ b/tests/run_agent/test_codex_xai_oauth_recovery.py @@ -349,3 +349,193 @@ def test_codex_transport_native_codex_still_replays_reasoning_in_input(): assert reasoning_items[0]["encrypted_content"] == "enc_blob" # Native Codex still asks for encrypted_content back. assert "reasoning.encrypted_content" in kwargs.get("include", []) + + +# --------------------------------------------------------------------------- +# Fix D: entitlement 403 must NOT trigger credential-pool refresh loop +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "message", + [ + # The exact wire text RaidenTyler and Don Piedro captured. + "You have either run out of available resources or do not have an " + "active Grok subscription. Manage at https://grok.com", + # Permission-style variant from the same 403 body. + "The caller does not have permission to execute the specified " + "operation for grok-4.3", + ], +) +def test_is_entitlement_failure_matches_real_xai_bodies(message): + from run_agent import AIAgent + + assert AIAgent._is_entitlement_failure( + {"message": message, "reason": "permission_denied"}, + 403, + ) + + +def test_is_entitlement_failure_false_for_status_other_than_401_403(): + """200/429/500 must never be classified as entitlement, even if body matches.""" + from run_agent import AIAgent + + body = { + "message": "do not have an active Grok subscription", + } + assert not AIAgent._is_entitlement_failure(body, 500) + assert not AIAgent._is_entitlement_failure(body, 429) + assert not AIAgent._is_entitlement_failure(body, 200) + + +def test_is_entitlement_failure_false_for_unrelated_auth_errors(): + """A real auth failure (expired token, wrong key) must keep refreshing.""" + from run_agent import AIAgent + + # Generic Anthropic-style auth failure + assert not AIAgent._is_entitlement_failure( + {"message": "Invalid API key", "reason": "authentication_error"}, + 401, + ) + # OAuth token expired + assert not AIAgent._is_entitlement_failure( + {"message": "Token has expired", "reason": "unauthorized"}, + 401, + ) + # Empty context + assert not AIAgent._is_entitlement_failure({}, 401) + assert not AIAgent._is_entitlement_failure(None, 401) + + +def test_recover_with_credential_pool_skips_refresh_on_entitlement_403(): + """The recovery path must NOT call pool.try_refresh_current() on entitlement 403. + + Before the fix, an unsubscribed xAI OAuth account would burn the agent + loop indefinitely: refresh → 403 → refresh → 403, infinitely. With + the entitlement guard, recovery returns False so the error surfaces + normally with the friendly hint from _summarize_api_error. + """ + from run_agent import AIAgent + from agent.error_classifier import FailoverReason + + agent = _make_codex_agent() + + # Wire a fake credential pool that records refresh attempts. + refresh_calls = {"n": 0} + + class _FakePool: + def try_refresh_current(self): + refresh_calls["n"] += 1 + return MagicMock(id="should_not_be_called") + + def mark_exhausted_and_rotate(self, **_kwargs): + return None + + def has_available(self): + return False + + agent._credential_pool = _FakePool() + + error_context = { + "reason": "The caller does not have permission to execute the specified operation", + "message": "You have either run out of available resources or do not have an " + "active Grok subscription. Manage at https://grok.com", + } + + recovered, _retried_429 = agent._recover_with_credential_pool( + status_code=403, + has_retried_429=False, + classified_reason=FailoverReason.auth, + error_context=error_context, + ) + + assert recovered is False, "Entitlement 403 must surface, not silently recover" + assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on entitlement 403" + + +def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure(): + """Regression guard: legitimate auth errors must still trigger refresh.""" + from run_agent import AIAgent + from agent.error_classifier import FailoverReason + + agent = _make_codex_agent() + + refresh_calls = {"n": 0} + + class _FakePool: + def try_refresh_current(self): + refresh_calls["n"] += 1 + # Return a fake refreshed entry — semantically "refresh worked" + entry = MagicMock() + entry.id = "entry_refreshed" + return entry + + def mark_exhausted_and_rotate(self, **_kwargs): + return None + + def has_available(self): + return False + + agent._credential_pool = _FakePool() + # _swap_credential is called by the recovery path — stub it out + agent._swap_credential = MagicMock() + + error_context = { + "reason": "authentication_error", + "message": "Invalid API key", + } + + recovered, _retried_429 = agent._recover_with_credential_pool( + status_code=401, + has_retried_429=False, + classified_reason=FailoverReason.auth, + error_context=error_context, + ) + + assert recovered is True, "Genuine auth failure must still recover via refresh" + assert refresh_calls["n"] == 1 + + +# --------------------------------------------------------------------------- +# Fix E: grok-4.3 context length must be 1M, not 256K +# --------------------------------------------------------------------------- + + +def test_grok_4_3_context_length_is_1m(): + """grok-4.3 ships with 1M context per docs.x.ai/developers/models/grok-4.3. + + Hermes' substring-match fallback used to return 256k (from the + "grok-4" catch-all) which under-reported the model's real capacity. + """ + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + + # The entry exists with the expected value. + assert DEFAULT_CONTEXT_LENGTHS["grok-4.3"] == 1_000_000 + + # And longest-first substring matching resolves grok-4.3 and + # grok-4.3-latest to the new value, NOT the grok-4 catch-all. + for slug in ("grok-4.3", "grok-4.3-latest"): + matched_key = max( + (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()), + key=len, + ) + assert matched_key == "grok-4.3", ( + f"Expected longest-first match to land on grok-4.3 for {slug}, " + f"got {matched_key}" + ) + assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 1_000_000 + + +def test_grok_4_still_resolves_to_256k(): + """Regression guard: grok-4 (non-.3) must still resolve to 256k.""" + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + + for slug in ("grok-4", "grok-4-0709"): + matched_key = max( + (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()), + key=len, + ) + # grok-4-0709 contains "grok-4" but not "grok-4.3"; matched key + # must be "grok-4" (or a more specific variant family if one is + # ever added). The 256k contract must hold. + assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000