fix(xai-oauth): break entitlement-403 credential-refresh loop, bump grok-4.3 context to 1M (#26664)

Don Piedro's 18-minute hang on grok-4.3 traced to two issues PR #26644
didn't cover:

- _recover_with_credential_pool classifies 403 as FailoverReason.auth
  and calls pool.try_refresh_current().  For xAI OAuth on an
  unsubscribed account, refresh succeeds (mints a new token from the
  same account) but the next API call 403s with the same entitlement
  error.  Result: infinite refresh → retry → 403 loop until Ctrl+C
  (1133s in Don's log).  New _is_entitlement_failure(error_context,
  status_code) detects the subscription-shape body ("do not have an
  active Grok subscription" / "out of available resources" + grok /
  "does not have permission" + grok) and short-circuits recovery so
  _summarize_api_error surfaces PR #26644's friendly hint.

- grok-4.3 resolved to 256k via the grok-4 catch-all in
  DEFAULT_CONTEXT_LENGTHS.  Per docs.x.ai/developers/models/grok-4.3
  the model ships with 1M context.  Add explicit grok-4.3 entry
  before the grok-4 fallback (longest-first substring matching
  ensures grok-4.3 and grok-4.3-latest both land on the new value).

Tests: 8 new (23 total in test_codex_xai_oauth_recovery.py).
E2E verified Don's 100-iteration loop bails out with 0 refresh calls
while genuine auth failures still refresh once and recover.
This commit is contained in:
Teknium 2026-05-15 17:11:06 -07:00 committed by GitHub
parent dc4cde278b
commit ce0e189d3e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 247 additions and 0 deletions

View file

@ -349,3 +349,193 @@ def test_codex_transport_native_codex_still_replays_reasoning_in_input():
assert reasoning_items[0]["encrypted_content"] == "enc_blob"
# Native Codex still asks for encrypted_content back.
assert "reasoning.encrypted_content" in kwargs.get("include", [])
# ---------------------------------------------------------------------------
# Fix D: entitlement 403 must NOT trigger credential-pool refresh loop
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"message",
[
# The exact wire text RaidenTyler and Don Piedro captured.
"You have either run out of available resources or do not have an "
"active Grok subscription. Manage at https://grok.com",
# Permission-style variant from the same 403 body.
"The caller does not have permission to execute the specified "
"operation for grok-4.3",
],
)
def test_is_entitlement_failure_matches_real_xai_bodies(message):
from run_agent import AIAgent
assert AIAgent._is_entitlement_failure(
{"message": message, "reason": "permission_denied"},
403,
)
def test_is_entitlement_failure_false_for_status_other_than_401_403():
"""200/429/500 must never be classified as entitlement, even if body matches."""
from run_agent import AIAgent
body = {
"message": "do not have an active Grok subscription",
}
assert not AIAgent._is_entitlement_failure(body, 500)
assert not AIAgent._is_entitlement_failure(body, 429)
assert not AIAgent._is_entitlement_failure(body, 200)
def test_is_entitlement_failure_false_for_unrelated_auth_errors():
"""A real auth failure (expired token, wrong key) must keep refreshing."""
from run_agent import AIAgent
# Generic Anthropic-style auth failure
assert not AIAgent._is_entitlement_failure(
{"message": "Invalid API key", "reason": "authentication_error"},
401,
)
# OAuth token expired
assert not AIAgent._is_entitlement_failure(
{"message": "Token has expired", "reason": "unauthorized"},
401,
)
# Empty context
assert not AIAgent._is_entitlement_failure({}, 401)
assert not AIAgent._is_entitlement_failure(None, 401)
def test_recover_with_credential_pool_skips_refresh_on_entitlement_403():
"""The recovery path must NOT call pool.try_refresh_current() on entitlement 403.
Before the fix, an unsubscribed xAI OAuth account would burn the agent
loop indefinitely: refresh 403 refresh 403, infinitely. With
the entitlement guard, recovery returns False so the error surfaces
normally with the friendly hint from _summarize_api_error.
"""
from run_agent import AIAgent
from agent.error_classifier import FailoverReason
agent = _make_codex_agent()
# Wire a fake credential pool that records refresh attempts.
refresh_calls = {"n": 0}
class _FakePool:
def try_refresh_current(self):
refresh_calls["n"] += 1
return MagicMock(id="should_not_be_called")
def mark_exhausted_and_rotate(self, **_kwargs):
return None
def has_available(self):
return False
agent._credential_pool = _FakePool()
error_context = {
"reason": "The caller does not have permission to execute the specified operation",
"message": "You have either run out of available resources or do not have an "
"active Grok subscription. Manage at https://grok.com",
}
recovered, _retried_429 = agent._recover_with_credential_pool(
status_code=403,
has_retried_429=False,
classified_reason=FailoverReason.auth,
error_context=error_context,
)
assert recovered is False, "Entitlement 403 must surface, not silently recover"
assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on entitlement 403"
def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure():
"""Regression guard: legitimate auth errors must still trigger refresh."""
from run_agent import AIAgent
from agent.error_classifier import FailoverReason
agent = _make_codex_agent()
refresh_calls = {"n": 0}
class _FakePool:
def try_refresh_current(self):
refresh_calls["n"] += 1
# Return a fake refreshed entry — semantically "refresh worked"
entry = MagicMock()
entry.id = "entry_refreshed"
return entry
def mark_exhausted_and_rotate(self, **_kwargs):
return None
def has_available(self):
return False
agent._credential_pool = _FakePool()
# _swap_credential is called by the recovery path — stub it out
agent._swap_credential = MagicMock()
error_context = {
"reason": "authentication_error",
"message": "Invalid API key",
}
recovered, _retried_429 = agent._recover_with_credential_pool(
status_code=401,
has_retried_429=False,
classified_reason=FailoverReason.auth,
error_context=error_context,
)
assert recovered is True, "Genuine auth failure must still recover via refresh"
assert refresh_calls["n"] == 1
# ---------------------------------------------------------------------------
# Fix E: grok-4.3 context length must be 1M, not 256K
# ---------------------------------------------------------------------------
def test_grok_4_3_context_length_is_1m():
"""grok-4.3 ships with 1M context per docs.x.ai/developers/models/grok-4.3.
Hermes' substring-match fallback used to return 256k (from the
"grok-4" catch-all) which under-reported the model's real capacity.
"""
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
# The entry exists with the expected value.
assert DEFAULT_CONTEXT_LENGTHS["grok-4.3"] == 1_000_000
# And longest-first substring matching resolves grok-4.3 and
# grok-4.3-latest to the new value, NOT the grok-4 catch-all.
for slug in ("grok-4.3", "grok-4.3-latest"):
matched_key = max(
(k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
key=len,
)
assert matched_key == "grok-4.3", (
f"Expected longest-first match to land on grok-4.3 for {slug}, "
f"got {matched_key}"
)
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 1_000_000
def test_grok_4_still_resolves_to_256k():
"""Regression guard: grok-4 (non-.3) must still resolve to 256k."""
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
for slug in ("grok-4", "grok-4-0709"):
matched_key = max(
(k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
key=len,
)
# grok-4-0709 contains "grok-4" but not "grok-4.3"; matched key
# must be "grok-4" (or a more specific variant family if one is
# ever added). The 256k contract must hold.
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000