mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(xai-oauth): break entitlement-403 credential-refresh loop, bump grok-4.3 context to 1M (#26664)
Don Piedro's 18-minute hang on grok-4.3 traced to two issues PR #26644 didn't cover: - _recover_with_credential_pool classifies 403 as FailoverReason.auth and calls pool.try_refresh_current(). For xAI OAuth on an unsubscribed account, refresh succeeds (mints a new token from the same account) but the next API call 403s with the same entitlement error. Result: infinite refresh → retry → 403 loop until Ctrl+C (1133s in Don's log). New _is_entitlement_failure(error_context, status_code) detects the subscription-shape body ("do not have an active Grok subscription" / "out of available resources" + grok / "does not have permission" + grok) and short-circuits recovery so _summarize_api_error surfaces PR #26644's friendly hint. - grok-4.3 resolved to 256k via the grok-4 catch-all in DEFAULT_CONTEXT_LENGTHS. Per docs.x.ai/developers/models/grok-4.3 the model ships with 1M context. Add explicit grok-4.3 entry before the grok-4 fallback (longest-first substring matching ensures grok-4.3 and grok-4.3-latest both land on the new value). Tests: 8 new (23 total in test_codex_xai_oauth_recovery.py). E2E verified Don's 100-iteration loop bails out with 0 refresh calls while genuine auth failures still refresh once and recover.
This commit is contained in:
parent
dc4cde278b
commit
ce0e189d3e
3 changed files with 247 additions and 0 deletions
|
|
@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||||
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
||||||
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning
|
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning
|
||||||
"grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
|
"grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
|
||||||
|
"grok-4.3": 1000000, # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai
|
||||||
"grok-4": 256000, # grok-4, grok-4-0709
|
"grok-4": 256000, # grok-4, grok-4-0709
|
||||||
"grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
|
"grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
|
||||||
"grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest
|
"grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest
|
||||||
|
|
|
||||||
56
run_agent.py
56
run_agent.py
|
|
@ -4966,6 +4966,44 @@ class AIAgent:
|
||||||
trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
|
trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
|
||||||
_save_trajectory_to_file(trajectory, self.model, completed)
|
_save_trajectory_to_file(trajectory, self.model, completed)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_entitlement_failure(
|
||||||
|
error_context: Optional[Dict[str, Any]],
|
||||||
|
status_code: Optional[int],
|
||||||
|
) -> bool:
|
||||||
|
"""Detect subscription/entitlement 403s that masquerade as auth failures.
|
||||||
|
|
||||||
|
Returned True only when the body text matches a known entitlement
|
||||||
|
shape AND the status is 401/403. Refreshing an OAuth token cannot
|
||||||
|
fix an unsubscribed account, so callers should surface the error
|
||||||
|
instead of looping the credential pool.
|
||||||
|
|
||||||
|
Current matches:
|
||||||
|
* xAI OAuth: "do not have an active Grok subscription" /
|
||||||
|
"out of available resources" / "does not have permission" + "grok"
|
||||||
|
|
||||||
|
Extend here for new providers as we discover them (Anthropic's
|
||||||
|
Claude Max OAuth entitlement errors look distinct enough today that
|
||||||
|
the existing 1M-context-beta branch handles them; revisit if other
|
||||||
|
subscription tiers start producing the same loop signature).
|
||||||
|
"""
|
||||||
|
if status_code not in (401, 403, None):
|
||||||
|
return False
|
||||||
|
if not isinstance(error_context, dict):
|
||||||
|
return False
|
||||||
|
message = str(error_context.get("message") or "").lower()
|
||||||
|
reason = str(error_context.get("reason") or "").lower()
|
||||||
|
haystack = f"{message} {reason}"
|
||||||
|
if not haystack.strip():
|
||||||
|
return False
|
||||||
|
if "do not have an active grok subscription" in haystack:
|
||||||
|
return True
|
||||||
|
if "out of available resources" in haystack and "grok" in haystack:
|
||||||
|
return True
|
||||||
|
if "does not have permission" in haystack and "grok" in haystack:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _decorate_xai_entitlement_error(detail: str) -> str:
|
def _decorate_xai_entitlement_error(detail: str) -> str:
|
||||||
"""Append a friendly hint when xAI's OAuth surface returns an
|
"""Append a friendly hint when xAI's OAuth surface returns an
|
||||||
|
|
@ -7551,6 +7589,24 @@ class AIAgent:
|
||||||
return False, True
|
return False, True
|
||||||
|
|
||||||
if effective_reason == FailoverReason.auth:
|
if effective_reason == FailoverReason.auth:
|
||||||
|
# Subscription/entitlement 403s look like auth failures on the
|
||||||
|
# wire but refresh cannot fix them — the OAuth token is
|
||||||
|
# already valid; the account simply lacks the entitlement
|
||||||
|
# (e.g. xAI OAuth without SuperGrok/X Premium for grok-4.3).
|
||||||
|
# Without this guard, ``try_refresh_current()`` keeps minting
|
||||||
|
# fresh tokens against the same unsubscribed account and the
|
||||||
|
# main agent loop spins re-issuing the same 403 until the
|
||||||
|
# user Ctrl+C's. Surface the error instead so the friendly
|
||||||
|
# entitlement hint from ``_summarize_api_error`` can land.
|
||||||
|
if self._is_entitlement_failure(error_context, status_code):
|
||||||
|
logger.info(
|
||||||
|
"Credential %s — entitlement-shaped 403 from %s; "
|
||||||
|
"skipping pool refresh (account lacks subscription, "
|
||||||
|
"not a transient auth failure).",
|
||||||
|
status_code if status_code is not None else "auth",
|
||||||
|
self.provider or "provider",
|
||||||
|
)
|
||||||
|
return False, has_retried_429
|
||||||
refreshed = pool.try_refresh_current()
|
refreshed = pool.try_refresh_current()
|
||||||
if refreshed is not None:
|
if refreshed is not None:
|
||||||
logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
|
logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
|
||||||
|
|
|
||||||
|
|
@ -349,3 +349,193 @@ def test_codex_transport_native_codex_still_replays_reasoning_in_input():
|
||||||
assert reasoning_items[0]["encrypted_content"] == "enc_blob"
|
assert reasoning_items[0]["encrypted_content"] == "enc_blob"
|
||||||
# Native Codex still asks for encrypted_content back.
|
# Native Codex still asks for encrypted_content back.
|
||||||
assert "reasoning.encrypted_content" in kwargs.get("include", [])
|
assert "reasoning.encrypted_content" in kwargs.get("include", [])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fix D: entitlement 403 must NOT trigger credential-pool refresh loop
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"message",
|
||||||
|
[
|
||||||
|
# The exact wire text RaidenTyler and Don Piedro captured.
|
||||||
|
"You have either run out of available resources or do not have an "
|
||||||
|
"active Grok subscription. Manage at https://grok.com",
|
||||||
|
# Permission-style variant from the same 403 body.
|
||||||
|
"The caller does not have permission to execute the specified "
|
||||||
|
"operation for grok-4.3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_is_entitlement_failure_matches_real_xai_bodies(message):
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
assert AIAgent._is_entitlement_failure(
|
||||||
|
{"message": message, "reason": "permission_denied"},
|
||||||
|
403,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_entitlement_failure_false_for_status_other_than_401_403():
|
||||||
|
"""200/429/500 must never be classified as entitlement, even if body matches."""
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"message": "do not have an active Grok subscription",
|
||||||
|
}
|
||||||
|
assert not AIAgent._is_entitlement_failure(body, 500)
|
||||||
|
assert not AIAgent._is_entitlement_failure(body, 429)
|
||||||
|
assert not AIAgent._is_entitlement_failure(body, 200)
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_entitlement_failure_false_for_unrelated_auth_errors():
|
||||||
|
"""A real auth failure (expired token, wrong key) must keep refreshing."""
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
# Generic Anthropic-style auth failure
|
||||||
|
assert not AIAgent._is_entitlement_failure(
|
||||||
|
{"message": "Invalid API key", "reason": "authentication_error"},
|
||||||
|
401,
|
||||||
|
)
|
||||||
|
# OAuth token expired
|
||||||
|
assert not AIAgent._is_entitlement_failure(
|
||||||
|
{"message": "Token has expired", "reason": "unauthorized"},
|
||||||
|
401,
|
||||||
|
)
|
||||||
|
# Empty context
|
||||||
|
assert not AIAgent._is_entitlement_failure({}, 401)
|
||||||
|
assert not AIAgent._is_entitlement_failure(None, 401)
|
||||||
|
|
||||||
|
|
||||||
|
def test_recover_with_credential_pool_skips_refresh_on_entitlement_403():
|
||||||
|
"""The recovery path must NOT call pool.try_refresh_current() on entitlement 403.
|
||||||
|
|
||||||
|
Before the fix, an unsubscribed xAI OAuth account would burn the agent
|
||||||
|
loop indefinitely: refresh → 403 → refresh → 403, infinitely. With
|
||||||
|
the entitlement guard, recovery returns False so the error surfaces
|
||||||
|
normally with the friendly hint from _summarize_api_error.
|
||||||
|
"""
|
||||||
|
from run_agent import AIAgent
|
||||||
|
from agent.error_classifier import FailoverReason
|
||||||
|
|
||||||
|
agent = _make_codex_agent()
|
||||||
|
|
||||||
|
# Wire a fake credential pool that records refresh attempts.
|
||||||
|
refresh_calls = {"n": 0}
|
||||||
|
|
||||||
|
class _FakePool:
|
||||||
|
def try_refresh_current(self):
|
||||||
|
refresh_calls["n"] += 1
|
||||||
|
return MagicMock(id="should_not_be_called")
|
||||||
|
|
||||||
|
def mark_exhausted_and_rotate(self, **_kwargs):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def has_available(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
agent._credential_pool = _FakePool()
|
||||||
|
|
||||||
|
error_context = {
|
||||||
|
"reason": "The caller does not have permission to execute the specified operation",
|
||||||
|
"message": "You have either run out of available resources or do not have an "
|
||||||
|
"active Grok subscription. Manage at https://grok.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
recovered, _retried_429 = agent._recover_with_credential_pool(
|
||||||
|
status_code=403,
|
||||||
|
has_retried_429=False,
|
||||||
|
classified_reason=FailoverReason.auth,
|
||||||
|
error_context=error_context,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert recovered is False, "Entitlement 403 must surface, not silently recover"
|
||||||
|
assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on entitlement 403"
|
||||||
|
|
||||||
|
|
||||||
|
def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure():
|
||||||
|
"""Regression guard: legitimate auth errors must still trigger refresh."""
|
||||||
|
from run_agent import AIAgent
|
||||||
|
from agent.error_classifier import FailoverReason
|
||||||
|
|
||||||
|
agent = _make_codex_agent()
|
||||||
|
|
||||||
|
refresh_calls = {"n": 0}
|
||||||
|
|
||||||
|
class _FakePool:
|
||||||
|
def try_refresh_current(self):
|
||||||
|
refresh_calls["n"] += 1
|
||||||
|
# Return a fake refreshed entry — semantically "refresh worked"
|
||||||
|
entry = MagicMock()
|
||||||
|
entry.id = "entry_refreshed"
|
||||||
|
return entry
|
||||||
|
|
||||||
|
def mark_exhausted_and_rotate(self, **_kwargs):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def has_available(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
agent._credential_pool = _FakePool()
|
||||||
|
# _swap_credential is called by the recovery path — stub it out
|
||||||
|
agent._swap_credential = MagicMock()
|
||||||
|
|
||||||
|
error_context = {
|
||||||
|
"reason": "authentication_error",
|
||||||
|
"message": "Invalid API key",
|
||||||
|
}
|
||||||
|
|
||||||
|
recovered, _retried_429 = agent._recover_with_credential_pool(
|
||||||
|
status_code=401,
|
||||||
|
has_retried_429=False,
|
||||||
|
classified_reason=FailoverReason.auth,
|
||||||
|
error_context=error_context,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert recovered is True, "Genuine auth failure must still recover via refresh"
|
||||||
|
assert refresh_calls["n"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fix E: grok-4.3 context length must be 1M, not 256K
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_grok_4_3_context_length_is_1m():
|
||||||
|
"""grok-4.3 ships with 1M context per docs.x.ai/developers/models/grok-4.3.
|
||||||
|
|
||||||
|
Hermes' substring-match fallback used to return 256k (from the
|
||||||
|
"grok-4" catch-all) which under-reported the model's real capacity.
|
||||||
|
"""
|
||||||
|
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
|
||||||
|
|
||||||
|
# The entry exists with the expected value.
|
||||||
|
assert DEFAULT_CONTEXT_LENGTHS["grok-4.3"] == 1_000_000
|
||||||
|
|
||||||
|
# And longest-first substring matching resolves grok-4.3 and
|
||||||
|
# grok-4.3-latest to the new value, NOT the grok-4 catch-all.
|
||||||
|
for slug in ("grok-4.3", "grok-4.3-latest"):
|
||||||
|
matched_key = max(
|
||||||
|
(k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
|
||||||
|
key=len,
|
||||||
|
)
|
||||||
|
assert matched_key == "grok-4.3", (
|
||||||
|
f"Expected longest-first match to land on grok-4.3 for {slug}, "
|
||||||
|
f"got {matched_key}"
|
||||||
|
)
|
||||||
|
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 1_000_000
|
||||||
|
|
||||||
|
|
||||||
|
def test_grok_4_still_resolves_to_256k():
|
||||||
|
"""Regression guard: grok-4 (non-.3) must still resolve to 256k."""
|
||||||
|
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
|
||||||
|
|
||||||
|
for slug in ("grok-4", "grok-4-0709"):
|
||||||
|
matched_key = max(
|
||||||
|
(k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
|
||||||
|
key=len,
|
||||||
|
)
|
||||||
|
# grok-4-0709 contains "grok-4" but not "grok-4.3"; matched key
|
||||||
|
# must be "grok-4" (or a more specific variant family if one is
|
||||||
|
# ever added). The 256k contract must hold.
|
||||||
|
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue