fix(minimax-oauth): quarantine dead tokens on terminal refresh failure

resolve_minimax_oauth_runtime_credentials called _refresh_minimax_oauth_state
without a try/except, so a terminal failure (invalid_grant,
refresh_token_reused, invalid_refresh_token) raised AuthError but left
the dead refresh_token in auth.json. Every subsequent API call retried
the same token via a network round-trip, failing identically each time.

Fix: wrap the refresh call and, when exc.relogin_required is True and a
refresh_token is present, clear the dead OAuth fields (access_token,
refresh_token, expires_*) and write a last_auth_error quarantine marker
to auth.json before re-raising. The next call sees no access_token and
fails fast with 'not_logged_in' — no network retry — and the user is
prompted to re-authenticate.

Mirrors the existing quarantine pattern for Nous (_quarantine_nous_oauth_state),
xAI-OAuth (#28116), and Codex-OAuth (#28118). Persist failure is
best-effort (logged at DEBUG, error still re-raised).

Salvaged from #28003 by @EloquentBrush0x — contributor's branch was
severely stale (would have reverted ~5000 LOC across azure/kanban/i18n
subsystems); fix re-applied surgically with their pattern preserved and
added two regression tests (terminal-quarantines + transient-does-not-quarantine).
This commit is contained in:
EloquentBrush0x 2026-05-18 10:33:36 -07:00 committed by Teknium
parent b570e0fdd0
commit d9331eecee
2 changed files with 126 additions and 1 deletions

View file

@ -6788,7 +6788,28 @@ def resolve_minimax_oauth_runtime_credentials(
"MiniMax (OAuth).",
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
)
state = _refresh_minimax_oauth_state(state)
try:
state = _refresh_minimax_oauth_state(state)
except AuthError as exc:
if exc.relogin_required and state.get("refresh_token"):
# Terminal refresh failure — clear dead tokens from auth.json so
# subsequent calls fail fast without a network retry, mirroring
# the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
state.pop(_k, None)
state["last_auth_error"] = {
"provider": "minimax-oauth",
"code": exc.code or "refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
try:
_minimax_save_auth_state(state)
except Exception as _save_exc:
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
raise
return {
"provider": "minimax-oauth",
"api_key": state["access_token"],

View file

@ -469,6 +469,110 @@ def test_resolve_credentials_requires_login():
assert exc_info.value.relogin_required is True
# ---------------------------------------------------------------------------
# 11b. Terminal refresh failure quarantines dead tokens (#28003)
# ---------------------------------------------------------------------------
def test_resolve_credentials_quarantines_dead_tokens_on_terminal_refresh_failure():
"""Terminal refresh failure (relogin_required + refresh_token present) must
clear access_token/refresh_token/expires_* from auth.json and write a
last_auth_error marker, so subsequent calls fail fast with not_logged_in
instead of replaying the dead refresh token over the network.
Mirrors Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
"""
stale_state = {
"access_token": "dead-access-token",
"refresh_token": "dead-refresh-token",
"expires_at": "2026-01-01T00:00:00Z",
"expires_in": 3600,
"obtained_at": "2026-01-01T00:00:00Z",
"inference_base_url": "https://api.minimax.io/v1",
"portal_base_url": "https://portal.minimax.io",
"client_id": "test-client",
"region": "global",
}
saved_states = []
def _capture_save(s):
saved_states.append(dict(s))
def _terminal_refresh(_state):
raise AuthError(
"invalid_grant",
provider="minimax-oauth",
code="invalid_grant",
relogin_required=True,
)
with patch("hermes_cli.auth.get_provider_auth_state", return_value=stale_state), \
patch("hermes_cli.auth._refresh_minimax_oauth_state", side_effect=_terminal_refresh), \
patch("hermes_cli.auth._minimax_save_auth_state", side_effect=_capture_save):
with pytest.raises(AuthError) as exc_info:
resolve_minimax_oauth_runtime_credentials()
# The original AuthError is re-raised so callers get the right error surface.
assert exc_info.value.code == "invalid_grant"
assert exc_info.value.relogin_required is True
# A quarantine save must have happened.
assert len(saved_states) == 1
quarantined = saved_states[0]
# Dead OAuth fields cleared.
assert "access_token" not in quarantined
assert "refresh_token" not in quarantined
assert "expires_at" not in quarantined
assert "expires_in" not in quarantined
assert "obtained_at" not in quarantined
# Routing/identity metadata preserved.
assert quarantined["inference_base_url"] == "https://api.minimax.io/v1"
assert quarantined["portal_base_url"] == "https://portal.minimax.io"
assert quarantined["client_id"] == "test-client"
assert quarantined["region"] == "global"
# Structured diagnostic blob written.
err = quarantined.get("last_auth_error")
assert isinstance(err, dict)
assert err["provider"] == "minimax-oauth"
assert err["code"] == "invalid_grant"
assert err["reason"] == "runtime_refresh_failure"
assert err["relogin_required"] is True
assert "at" in err
def test_resolve_credentials_does_not_quarantine_on_transient_refresh_failure():
"""When refresh raises with relogin_required=False (e.g. 429 / 5xx), the
dead-token quarantine path must NOT fire tokens stay on disk for the
next attempt.
"""
stale_state = {
"access_token": "still-good-access-token",
"refresh_token": "still-good-refresh-token",
"expires_at": "2026-01-01T00:00:00Z",
"inference_base_url": "https://api.minimax.io/v1",
}
saved_states = []
def _transient_refresh(_state):
raise AuthError(
"service unavailable",
provider="minimax-oauth",
code="refresh_failed",
relogin_required=False,
)
with patch("hermes_cli.auth.get_provider_auth_state", return_value=stale_state), \
patch("hermes_cli.auth._refresh_minimax_oauth_state", side_effect=_transient_refresh), \
patch("hermes_cli.auth._minimax_save_auth_state", side_effect=lambda s: saved_states.append(dict(s))):
with pytest.raises(AuthError) as exc_info:
resolve_minimax_oauth_runtime_credentials()
assert exc_info.value.relogin_required is False
# No quarantine save should have happened.
assert saved_states == []
# ---------------------------------------------------------------------------
# 12. test_provider_registry_contains_minimax_oauth
# ---------------------------------------------------------------------------