fix(minimax-oauth): refresh short-lived access tokens per request (#30619)

* fix(minimax-oauth): refresh short-lived access tokens per request

MiniMax OAuth issues ~15-minute access tokens. The Anthropic SDK caches
api_key as a static string at client construction, so a session that
resolves credentials once at startup keeps sending the same bearer until
MiniMax returns 401 mid-session.

Swap the static string for a callable token provider, reusing the existing
Entra-ID bearer-hook infrastructure in build_anthropic_client. The callable
re-reads auth.json on each invocation and calls _refresh_minimax_oauth_state,
which is a no-op when the token still has more than 60s of life left and
refreshes proactively otherwise. Refreshes persist to auth.json so other
processes (gateway, cron) see them immediately.

The wire-up lives at the agent-init / model-switch boundary rather than in
resolve_runtime_provider, so aux client paths that hand the api_key string
to OpenAI(api_key=...) are unaffected.

* docs: add infographic for minimax-oauth token refresh
This commit is contained in:
Teknium 2026-05-22 15:16:15 -07:00 committed by GitHub
parent 2f320cb35a
commit a84cec61ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 334 additions and 22 deletions

View file

@ -607,6 +607,31 @@ def init_agent(
# Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
_is_native_anthropic = agent.provider == "anthropic"
effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
# MiniMax OAuth issues short-lived (~15-min) access tokens. The
# Anthropic SDK caches ``api_key`` as a static string at client
# construction time, so a session that resolves the bearer once
# at startup will keep sending the same token until MiniMax
# returns 401 mid-session. Swap the static string for a callable
# token provider — ``build_anthropic_client`` recognizes the
# callable and installs an httpx event hook that mints a fresh
# bearer per outbound request (re-reading auth.json so a refresh
# persisted by another process is visible immediately).
# The cached refresh path is a no-op when the token still has
# ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of life left, so steady-
# state cost is one file read + one timestamp compare per request.
if agent.provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
try:
from hermes_cli.auth import build_minimax_oauth_token_provider
effective_key = build_minimax_oauth_token_provider()
except Exception as _mm_exc: # noqa: BLE001 — never block startup on this
import logging as _logging
_logging.getLogger(__name__).warning(
"MiniMax OAuth: failed to install per-request token provider "
"(%s); falling back to static bearer that will expire ~15min in.",
_mm_exc,
)
agent.api_key = effective_key
agent._anthropic_api_key = effective_key
agent._anthropic_base_url = base_url
@ -618,7 +643,7 @@ def init_agent(
# that cause 401/403 on their endpoints. Guards #1739 and
# the third-party identity-injection bug.
from agent.anthropic_adapter import _is_oauth_token as _is_oat
agent._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False
agent._is_anthropic_oauth = _is_oat(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
agent._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
# No OpenAI client needed for Anthropic mode
agent.client = None

View file

@ -1352,6 +1352,22 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
# API key — falling back would send Anthropic credentials to third-party endpoints.
_is_native_anthropic = new_provider == "anthropic"
effective_key = (api_key or agent.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or agent.api_key or "")
# MiniMax OAuth: swap static string for a per-request callable token
# provider so the rebuilt client survives 15-min token expiry. See
# the matching block in agent_init.py for the full rationale.
if new_provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
try:
from hermes_cli.auth import build_minimax_oauth_token_provider
effective_key = build_minimax_oauth_token_provider()
except Exception as _mm_exc: # noqa: BLE001
import logging as _logging
_logging.getLogger(__name__).warning(
"MiniMax OAuth: failed to install per-request token provider "
"on switch (%s); using static bearer.",
_mm_exc,
)
agent.api_key = effective_key
agent._anthropic_api_key = effective_key
agent._anthropic_base_url = base_url or getattr(agent, "_anthropic_base_url", None)
@ -1359,7 +1375,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
effective_key, agent._anthropic_base_url,
timeout=get_provider_request_timeout(agent.provider, agent.model),
)
agent._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False
agent._is_anthropic_oauth = _is_oauth_token(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
agent.client = None
agent._client_kwargs = {}
else:

View file

@ -7106,10 +7106,95 @@ def _refresh_minimax_oauth_state(
return new_state
def _minimax_oauth_quarantine_on_terminal_refresh(state: Dict[str, Any], exc: AuthError) -> None:
"""Wipe dead tokens from auth.json after a terminal refresh failure.
Shared by both the eager-resolve path and the lazy per-request token
provider. Mirrors the Nous / xAI-OAuth / Codex-OAuth quarantine pattern
so subsequent calls fail fast without a network retry.
"""
if not (exc.relogin_required and state.get("refresh_token")):
return
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
state.pop(_k, None)
state["last_auth_error"] = {
"provider": "minimax-oauth",
"code": exc.code or "refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
try:
_minimax_save_auth_state(state)
except Exception as _save_exc:
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
def build_minimax_oauth_token_provider() -> Callable[[], str]:
"""Return a zero-arg callable that yields a fresh MiniMax access token.
The Anthropic SDK caches ``api_key`` as a static string at construction
time, so a session that resolves credentials once at startup will keep
sending the same bearer until MiniMax's server returns 401 — typically
~15 minutes in, because MiniMax issues short-lived access tokens.
Returning a *callable* instead of a string lets us hook into the
existing Entra-ID bearer infrastructure in
:mod:`agent.anthropic_adapter`: ``build_anthropic_client`` detects a
callable and routes through ``_build_anthropic_client_with_bearer_hook``,
which mints a fresh ``Authorization`` header on every outbound request.
Each invocation re-reads the persisted state from ``auth.json`` and
calls :func:`_refresh_minimax_oauth_state` that helper is a no-op
when the token still has more than ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS``
of life left, so the steady-state cost is one file read + one
timestamp compare per request.
Reading state fresh each time also means a refresh persisted by one
process (CLI, gateway, cron) is immediately visible to every other
process sharing the same ``auth.json``.
"""
def _provide() -> str:
state = get_provider_auth_state("minimax-oauth")
if not state or not state.get("access_token"):
raise AuthError(
"Not logged into MiniMax OAuth. Run `hermes model` and select "
"MiniMax (OAuth).",
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
)
try:
state = _refresh_minimax_oauth_state(state)
except AuthError as exc:
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
raise
token = state.get("access_token")
if not token:
raise AuthError(
"MiniMax OAuth state has no access_token after refresh.",
provider="minimax-oauth", code="no_access_token", relogin_required=True,
)
return token
return _provide
def resolve_minimax_oauth_runtime_credentials(
*, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
as_token_provider: bool = False,
) -> Dict[str, Any]:
"""Return {provider, api_key, base_url, source} for minimax-oauth."""
"""Return {provider, api_key, base_url, source} for minimax-oauth.
When ``as_token_provider`` is True, ``api_key`` is a zero-arg callable
that mints a fresh access token per call (proactively refreshing if
the cached token is within ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of
expiry). This is what the runtime provider path uses so that long
sessions survive MiniMax's short access-token lifetime — see
:func:`build_minimax_oauth_token_provider` for the rationale.
The default (string ``api_key``) preserves the historical contract for
diagnostic call sites like ``hermes status`` that just want to know
whether a valid token exists right now.
"""
state = get_provider_auth_state("minimax-oauth")
if not state or not state.get("access_token"):
raise AuthError(
@ -7120,28 +7205,15 @@ def resolve_minimax_oauth_runtime_credentials(
try:
state = _refresh_minimax_oauth_state(state)
except AuthError as exc:
if exc.relogin_required and state.get("refresh_token"):
# Terminal refresh failure — clear dead tokens from auth.json so
# subsequent calls fail fast without a network retry, mirroring
# the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
state.pop(_k, None)
state["last_auth_error"] = {
"provider": "minimax-oauth",
"code": exc.code or "refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
try:
_minimax_save_auth_state(state)
except Exception as _save_exc:
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
raise
if as_token_provider:
api_key: Any = build_minimax_oauth_token_provider()
else:
api_key = state["access_token"]
return {
"provider": "minimax-oauth",
"api_key": state["access_token"],
"api_key": api_key,
"base_url": state["inference_base_url"].rstrip("/"),
"source": "oauth",
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

View file

@ -642,3 +642,202 @@ def test_generic_auth_status_dispatches_minimax_oauth():
assert status["logged_in"] is True
assert status["provider"] == "minimax-oauth"
assert status["region"] == "global"
# ---------------------------------------------------------------------------
# build_minimax_oauth_token_provider — per-request callable bearer
# ---------------------------------------------------------------------------
# These tests verify the fix for short-lived (~15-min) MiniMax access tokens
# expiring mid-session. The callable is invoked by the Anthropic SDK on every
# outbound request via the existing Entra-style bearer hook.
def test_token_provider_returns_current_access_token_when_fresh():
"""When token is far from expiry, callable just returns the cached token."""
from hermes_cli.auth import build_minimax_oauth_token_provider
state = {
"access_token": "still-fresh",
"refresh_token": "rt",
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
"client_id": MINIMAX_OAUTH_CLIENT_ID,
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
"expires_at": _future_iso(3600),
}
provider = build_minimax_oauth_token_provider()
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state), \
patch("httpx.Client") as mock_client_class:
token = provider()
# No network call should happen — token is fresh.
mock_client_class.assert_not_called()
assert token == "still-fresh"
def test_token_provider_refreshes_when_near_expiry():
"""When token is within the skew window, callable mints a fresh one."""
from hermes_cli.auth import build_minimax_oauth_token_provider
state = {
"access_token": "about-to-die",
"refresh_token": "rt",
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
"client_id": MINIMAX_OAUTH_CLIENT_ID,
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
"expires_at": _future_iso(MINIMAX_OAUTH_REFRESH_SKEW_SECONDS - 1),
}
refreshed_body = {
"status": "success",
"access_token": "fresh-bearer",
"refresh_token": "rt2",
"expired_in": 900,
}
mock_resp = _make_httpx_response(200, refreshed_body)
provider = build_minimax_oauth_token_provider()
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state), \
patch("httpx.Client") as mock_client_class, \
patch("hermes_cli.auth._minimax_save_auth_state"):
mock_instance = MagicMock()
mock_instance.__enter__ = MagicMock(return_value=mock_instance)
mock_instance.__exit__ = MagicMock(return_value=False)
mock_instance.post.return_value = mock_resp
mock_client_class.return_value = mock_instance
token = provider()
assert token == "fresh-bearer"
def test_token_provider_rereads_state_each_call():
"""Each callable invocation re-reads auth.json so cross-process refreshes
persisted by another hermes process are immediately visible."""
from hermes_cli.auth import build_minimax_oauth_token_provider
states = [
{
"access_token": "first-token",
"refresh_token": "rt",
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
"client_id": MINIMAX_OAUTH_CLIENT_ID,
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
"expires_at": _future_iso(3600),
},
{
"access_token": "second-token-after-another-process-refreshed",
"refresh_token": "rt",
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
"client_id": MINIMAX_OAUTH_CLIENT_ID,
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
"expires_at": _future_iso(3600),
},
]
provider = build_minimax_oauth_token_provider()
with patch("hermes_cli.auth.get_provider_auth_state", side_effect=states):
first = provider()
second = provider()
assert first == "first-token"
assert second == "second-token-after-another-process-refreshed"
def test_token_provider_raises_not_logged_in_when_state_missing():
"""No state in auth.json → AuthError(not_logged_in, relogin_required=True)."""
from hermes_cli.auth import build_minimax_oauth_token_provider
provider = build_minimax_oauth_token_provider()
with patch("hermes_cli.auth.get_provider_auth_state", return_value=None):
with pytest.raises(AuthError) as exc_info:
provider()
assert exc_info.value.code == "not_logged_in"
assert exc_info.value.relogin_required is True
def test_token_provider_quarantines_state_on_terminal_refresh():
"""When refresh returns invalid_grant, callable raises AuthError AND
wipes the dead tokens so subsequent calls fail fast without network."""
from hermes_cli.auth import build_minimax_oauth_token_provider
state = {
"access_token": "expired",
"refresh_token": "burned-rt",
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
"client_id": MINIMAX_OAUTH_CLIENT_ID,
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
"expires_at": _past_iso(100),
}
bad_resp = _make_httpx_response(400, text="invalid_grant")
bad_resp.json.side_effect = Exception("no json")
bad_resp.text = "invalid_grant"
bad_resp.reason_phrase = "Bad Request"
saved_states: list[dict] = []
provider = build_minimax_oauth_token_provider()
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state), \
patch("httpx.Client") as mock_client_class, \
patch(
"hermes_cli.auth._minimax_save_auth_state",
side_effect=lambda s: saved_states.append(dict(s)),
):
mock_instance = MagicMock()
mock_instance.__enter__ = MagicMock(return_value=mock_instance)
mock_instance.__exit__ = MagicMock(return_value=False)
mock_instance.post.return_value = bad_resp
mock_client_class.return_value = mock_instance
with pytest.raises(AuthError) as exc_info:
provider()
assert exc_info.value.relogin_required is True
# Quarantine wrote a state with tokens removed.
assert len(saved_states) == 1
quarantined = saved_states[0]
assert "access_token" not in quarantined
assert "refresh_token" not in quarantined
assert quarantined["last_auth_error"]["relogin_required"] is True
def test_resolve_returns_callable_when_as_token_provider_true():
"""Explicit opt-in path: resolve_minimax_oauth_runtime_credentials(as_token_provider=True)
returns a callable api_key."""
state = {
"access_token": "tok",
"refresh_token": "rt",
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
"client_id": MINIMAX_OAUTH_CLIENT_ID,
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
"expires_at": _future_iso(3600),
}
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state):
creds = resolve_minimax_oauth_runtime_credentials(as_token_provider=True)
assert callable(creds["api_key"])
assert not isinstance(creds["api_key"], str)
assert creds["base_url"] == MINIMAX_OAUTH_GLOBAL_INFERENCE.rstrip("/")
def test_resolve_returns_string_by_default():
"""Backwards-compatible default: api_key is a string materialized once."""
state = {
"access_token": "tok",
"refresh_token": "rt",
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
"client_id": MINIMAX_OAUTH_CLIENT_ID,
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
"expires_at": _future_iso(3600),
}
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state):
creds = resolve_minimax_oauth_runtime_credentials()
assert creds["api_key"] == "tok"
assert isinstance(creds["api_key"], str)