fix(minimax-oauth): refresh short-lived access tokens per request (#30619)

* fix(minimax-oauth): refresh short-lived access tokens per request

MiniMax OAuth issues ~15-minute access tokens. The Anthropic SDK caches
api_key as a static string at client construction, so a session that
resolves credentials once at startup keeps sending the same bearer until
MiniMax returns 401 mid-session.

Swap the static string for a callable token provider, reusing the existing
Entra-ID bearer-hook infrastructure in build_anthropic_client. The callable
re-reads auth.json on each invocation and calls _refresh_minimax_oauth_state,
which is a no-op when the token still has more than 60s of life left and
refreshes proactively otherwise. Refreshes persist to auth.json so other
processes (gateway, cron) see them immediately.

The wire-up lives at the agent-init / model-switch boundary rather than in
resolve_runtime_provider, so aux client paths that hand the api_key string
to OpenAI(api_key=...) are unaffected.

* docs: add infographic for minimax-oauth token refresh
This commit is contained in:
Teknium 2026-05-22 15:16:15 -07:00 committed by GitHub
parent 2f320cb35a
commit a84cec61ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 334 additions and 22 deletions

View file

@ -7106,10 +7106,95 @@ def _refresh_minimax_oauth_state(
return new_state
def _minimax_oauth_quarantine_on_terminal_refresh(state: Dict[str, Any], exc: AuthError) -> None:
"""Wipe dead tokens from auth.json after a terminal refresh failure.
Shared by both the eager-resolve path and the lazy per-request token
provider. Mirrors the Nous / xAI-OAuth / Codex-OAuth quarantine pattern
so subsequent calls fail fast without a network retry.
"""
if not (exc.relogin_required and state.get("refresh_token")):
return
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
state.pop(_k, None)
state["last_auth_error"] = {
"provider": "minimax-oauth",
"code": exc.code or "refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
try:
_minimax_save_auth_state(state)
except Exception as _save_exc:
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
def build_minimax_oauth_token_provider() -> Callable[[], str]:
"""Return a zero-arg callable that yields a fresh MiniMax access token.
The Anthropic SDK caches ``api_key`` as a static string at construction
time, so a session that resolves credentials once at startup will keep
sending the same bearer until MiniMax's server returns 401 — typically
~15 minutes in, because MiniMax issues short-lived access tokens.
Returning a *callable* instead of a string lets us hook into the
existing Entra-ID bearer infrastructure in
:mod:`agent.anthropic_adapter`: ``build_anthropic_client`` detects a
callable and routes through ``_build_anthropic_client_with_bearer_hook``,
which mints a fresh ``Authorization`` header on every outbound request.
Each invocation re-reads the persisted state from ``auth.json`` and
calls :func:`_refresh_minimax_oauth_state` that helper is a no-op
when the token still has more than ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS``
of life left, so the steady-state cost is one file read + one
timestamp compare per request.
Reading state fresh each time also means a refresh persisted by one
process (CLI, gateway, cron) is immediately visible to every other
process sharing the same ``auth.json``.
"""
def _provide() -> str:
state = get_provider_auth_state("minimax-oauth")
if not state or not state.get("access_token"):
raise AuthError(
"Not logged into MiniMax OAuth. Run `hermes model` and select "
"MiniMax (OAuth).",
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
)
try:
state = _refresh_minimax_oauth_state(state)
except AuthError as exc:
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
raise
token = state.get("access_token")
if not token:
raise AuthError(
"MiniMax OAuth state has no access_token after refresh.",
provider="minimax-oauth", code="no_access_token", relogin_required=True,
)
return token
return _provide
def resolve_minimax_oauth_runtime_credentials(
*, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
as_token_provider: bool = False,
) -> Dict[str, Any]:
"""Return {provider, api_key, base_url, source} for minimax-oauth."""
"""Return {provider, api_key, base_url, source} for minimax-oauth.
When ``as_token_provider`` is True, ``api_key`` is a zero-arg callable
that mints a fresh access token per call (proactively refreshing if
the cached token is within ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of
expiry). This is what the runtime provider path uses so that long
sessions survive MiniMax's short access-token lifetime — see
:func:`build_minimax_oauth_token_provider` for the rationale.
The default (string ``api_key``) preserves the historical contract for
diagnostic call sites like ``hermes status`` that just want to know
whether a valid token exists right now.
"""
state = get_provider_auth_state("minimax-oauth")
if not state or not state.get("access_token"):
raise AuthError(
@ -7120,28 +7205,15 @@ def resolve_minimax_oauth_runtime_credentials(
try:
state = _refresh_minimax_oauth_state(state)
except AuthError as exc:
if exc.relogin_required and state.get("refresh_token"):
# Terminal refresh failure — clear dead tokens from auth.json so
# subsequent calls fail fast without a network retry, mirroring
# the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
state.pop(_k, None)
state["last_auth_error"] = {
"provider": "minimax-oauth",
"code": exc.code or "refresh_failed",
"message": str(exc),
"reason": "runtime_refresh_failure",
"relogin_required": True,
"at": datetime.now(timezone.utc).isoformat(),
}
try:
_minimax_save_auth_state(state)
except Exception as _save_exc:
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
raise
if as_token_provider:
api_key: Any = build_minimax_oauth_token_provider()
else:
api_key = state["access_token"]
return {
"provider": "minimax-oauth",
"api_key": state["access_token"],
"api_key": api_key,
"base_url": state["inference_base_url"].rstrip("/"),
"source": "oauth",
}