fix(xai-oauth): quarantine dead tokens on terminal refresh failure

resolve_xai_oauth_runtime_credentials() called _refresh_xai_oauth_tokens()
with no try/except. A terminal refresh failure (HTTP 400/401/403 —
invalid_grant, token revoked) propagated without clearing the dead
access_token / refresh_token from auth.json, causing every subsequent
session to retry the same doomed network request.

Add a try/except around the refresh call that mirrors the existing
credential_pool.py quarantine: when _is_terminal_xai_oauth_refresh_error
identifies a non-retryable failure, clear the dead token fields from
auth.json and write a last_auth_error diagnostic marker so future calls
fail fast with a clear relogin_required error instead of hitting the
network.

active_provider is preserved (set_active=False) so multi-provider users
whose chosen provider is not xai-oauth are unaffected.

Tests: two new cases in test_auth_xai_oauth_provider.py cover terminal
quarantine and transient pass-through.
This commit is contained in:
EloquentBrush0x 2026-05-18 22:53:51 +03:00 committed by Teknium
parent 7321b3c2db
commit b3e714e8b7
2 changed files with 152 additions and 7 deletions

View file

@ -3546,13 +3546,41 @@ def resolve_xai_oauth_runtime_credentials(
if should_refresh:
if not token_endpoint:
token_endpoint = _xai_oauth_discovery(refresh_timeout_seconds)["token_endpoint"]
tokens = _refresh_xai_oauth_tokens(
tokens,
token_endpoint=token_endpoint,
redirect_uri=redirect_uri,
timeout_seconds=refresh_timeout_seconds,
)
access_token = str(tokens.get("access_token", "") or "").strip()
try:
tokens = _refresh_xai_oauth_tokens(
tokens,
token_endpoint=token_endpoint,
redirect_uri=redirect_uri,
timeout_seconds=refresh_timeout_seconds,
)
access_token = str(tokens.get("access_token", "") or "").strip()
except AuthError as exc:
if _is_terminal_xai_oauth_refresh_error(exc):
# Terminal failure (HTTP 400/401/403 — invalid_grant, token revoked).
# Clear dead tokens from auth.json so subsequent sessions fail fast
# without a network retry. Mirrors credential_pool.py quarantine.
try:
_q_store = _load_auth_store()
_q_state = _load_provider_state(_q_store, "xai-oauth") or {}
_q_tokens = dict(_q_state.get("tokens") or {})
_q_tokens.pop("access_token", None)
_q_tokens.pop("refresh_token", None)
_q_state["tokens"] = _q_tokens
_q_state["last_auth_error"] = {
"provider": "xai-oauth",
"code": exc.code or "xai_refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
_store_provider_state(_q_store, "xai-oauth", _q_state, set_active=False)
_save_auth_store(_q_store)
except Exception as _save_exc:
logger.debug(
"xAI OAuth: failed to persist quarantined state: %s", _save_exc,
)
raise
base_url = (
os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")