mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(minimax-oauth): quarantine dead tokens on terminal refresh failure
resolve_minimax_oauth_runtime_credentials called _refresh_minimax_oauth_state without a try/except, so a terminal failure (invalid_grant, refresh_token_reused, invalid_refresh_token) raised AuthError but left the dead refresh_token in auth.json. Every subsequent API call retried the same token via a network round-trip, failing identically each time. Fix: wrap the refresh call and, when exc.relogin_required is True and a refresh_token is present, clear the dead OAuth fields (access_token, refresh_token, expires_*) and write a last_auth_error quarantine marker to auth.json before re-raising. The next call sees no access_token and fails fast with 'not_logged_in' — no network retry — and the user is prompted to re-authenticate. Mirrors the existing quarantine pattern for Nous (_quarantine_nous_oauth_state), xAI-OAuth (#28116), and Codex-OAuth (#28118). Persist failure is best-effort (logged at DEBUG, error still re-raised). Salvaged from #28003 by @EloquentBrush0x — contributor's branch was severely stale (would have reverted ~5000 LOC across azure/kanban/i18n subsystems); fix re-applied surgically with their pattern preserved and added two regression tests (terminal-quarantines + transient-does-not-quarantine).
This commit is contained in:
parent
b570e0fdd0
commit
d9331eecee
2 changed files with 126 additions and 1 deletions
|
|
@ -6788,7 +6788,28 @@ def resolve_minimax_oauth_runtime_credentials(
|
|||
"MiniMax (OAuth).",
|
||||
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
|
||||
)
|
||||
state = _refresh_minimax_oauth_state(state)
|
||||
try:
|
||||
state = _refresh_minimax_oauth_state(state)
|
||||
except AuthError as exc:
|
||||
if exc.relogin_required and state.get("refresh_token"):
|
||||
# Terminal refresh failure — clear dead tokens from auth.json so
|
||||
# subsequent calls fail fast without a network retry, mirroring
|
||||
# the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
|
||||
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
|
||||
state.pop(_k, None)
|
||||
state["last_auth_error"] = {
|
||||
"provider": "minimax-oauth",
|
||||
"code": exc.code or "refresh_failed",
|
||||
"message": str(exc),
|
||||
"reason": "runtime_refresh_failure",
|
||||
"relogin_required": True,
|
||||
"at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
try:
|
||||
_minimax_save_auth_state(state)
|
||||
except Exception as _save_exc:
|
||||
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
|
||||
raise
|
||||
return {
|
||||
"provider": "minimax-oauth",
|
||||
"api_key": state["access_token"],
|
||||
|
|
|
|||
|
|
@ -469,6 +469,110 @@ def test_resolve_credentials_requires_login():
|
|||
assert exc_info.value.relogin_required is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 11b. Terminal refresh failure quarantines dead tokens (#28003)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_resolve_credentials_quarantines_dead_tokens_on_terminal_refresh_failure():
|
||||
"""Terminal refresh failure (relogin_required + refresh_token present) must
|
||||
clear access_token/refresh_token/expires_* from auth.json and write a
|
||||
last_auth_error marker, so subsequent calls fail fast with not_logged_in
|
||||
instead of replaying the dead refresh token over the network.
|
||||
Mirrors Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
|
||||
"""
|
||||
stale_state = {
|
||||
"access_token": "dead-access-token",
|
||||
"refresh_token": "dead-refresh-token",
|
||||
"expires_at": "2026-01-01T00:00:00Z",
|
||||
"expires_in": 3600,
|
||||
"obtained_at": "2026-01-01T00:00:00Z",
|
||||
"inference_base_url": "https://api.minimax.io/v1",
|
||||
"portal_base_url": "https://portal.minimax.io",
|
||||
"client_id": "test-client",
|
||||
"region": "global",
|
||||
}
|
||||
saved_states = []
|
||||
|
||||
def _capture_save(s):
|
||||
saved_states.append(dict(s))
|
||||
|
||||
def _terminal_refresh(_state):
|
||||
raise AuthError(
|
||||
"invalid_grant",
|
||||
provider="minimax-oauth",
|
||||
code="invalid_grant",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=stale_state), \
|
||||
patch("hermes_cli.auth._refresh_minimax_oauth_state", side_effect=_terminal_refresh), \
|
||||
patch("hermes_cli.auth._minimax_save_auth_state", side_effect=_capture_save):
|
||||
with pytest.raises(AuthError) as exc_info:
|
||||
resolve_minimax_oauth_runtime_credentials()
|
||||
|
||||
# The original AuthError is re-raised so callers get the right error surface.
|
||||
assert exc_info.value.code == "invalid_grant"
|
||||
assert exc_info.value.relogin_required is True
|
||||
|
||||
# A quarantine save must have happened.
|
||||
assert len(saved_states) == 1
|
||||
quarantined = saved_states[0]
|
||||
|
||||
# Dead OAuth fields cleared.
|
||||
assert "access_token" not in quarantined
|
||||
assert "refresh_token" not in quarantined
|
||||
assert "expires_at" not in quarantined
|
||||
assert "expires_in" not in quarantined
|
||||
assert "obtained_at" not in quarantined
|
||||
|
||||
# Routing/identity metadata preserved.
|
||||
assert quarantined["inference_base_url"] == "https://api.minimax.io/v1"
|
||||
assert quarantined["portal_base_url"] == "https://portal.minimax.io"
|
||||
assert quarantined["client_id"] == "test-client"
|
||||
assert quarantined["region"] == "global"
|
||||
|
||||
# Structured diagnostic blob written.
|
||||
err = quarantined.get("last_auth_error")
|
||||
assert isinstance(err, dict)
|
||||
assert err["provider"] == "minimax-oauth"
|
||||
assert err["code"] == "invalid_grant"
|
||||
assert err["reason"] == "runtime_refresh_failure"
|
||||
assert err["relogin_required"] is True
|
||||
assert "at" in err
|
||||
|
||||
|
||||
def test_resolve_credentials_does_not_quarantine_on_transient_refresh_failure():
|
||||
"""When refresh raises with relogin_required=False (e.g. 429 / 5xx), the
|
||||
dead-token quarantine path must NOT fire — tokens stay on disk for the
|
||||
next attempt.
|
||||
"""
|
||||
stale_state = {
|
||||
"access_token": "still-good-access-token",
|
||||
"refresh_token": "still-good-refresh-token",
|
||||
"expires_at": "2026-01-01T00:00:00Z",
|
||||
"inference_base_url": "https://api.minimax.io/v1",
|
||||
}
|
||||
saved_states = []
|
||||
|
||||
def _transient_refresh(_state):
|
||||
raise AuthError(
|
||||
"service unavailable",
|
||||
provider="minimax-oauth",
|
||||
code="refresh_failed",
|
||||
relogin_required=False,
|
||||
)
|
||||
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=stale_state), \
|
||||
patch("hermes_cli.auth._refresh_minimax_oauth_state", side_effect=_transient_refresh), \
|
||||
patch("hermes_cli.auth._minimax_save_auth_state", side_effect=lambda s: saved_states.append(dict(s))):
|
||||
with pytest.raises(AuthError) as exc_info:
|
||||
resolve_minimax_oauth_runtime_credentials()
|
||||
|
||||
assert exc_info.value.relogin_required is False
|
||||
# No quarantine save should have happened.
|
||||
assert saved_states == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 12. test_provider_registry_contains_minimax_oauth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue