mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(minimax-oauth): refresh short-lived access tokens per request (#30619)
* fix(minimax-oauth): refresh short-lived access tokens per request MiniMax OAuth issues ~15-minute access tokens. The Anthropic SDK caches api_key as a static string at client construction, so a session that resolves credentials once at startup keeps sending the same bearer until MiniMax returns 401 mid-session. Swap the static string for a callable token provider, reusing the existing Entra-ID bearer-hook infrastructure in build_anthropic_client. The callable re-reads auth.json on each invocation and calls _refresh_minimax_oauth_state, which is a no-op when the token still has more than 60s of life left and refreshes proactively otherwise. Refreshes persist to auth.json so other processes (gateway, cron) see them immediately. The wire-up lives at the agent-init / model-switch boundary rather than in resolve_runtime_provider, so aux client paths that hand the api_key string to OpenAI(api_key=...) are unaffected. * docs: add infographic for minimax-oauth token refresh
This commit is contained in:
parent
2f320cb35a
commit
a84cec61ca
5 changed files with 334 additions and 22 deletions
|
|
@ -607,6 +607,31 @@ def init_agent(
|
|||
# Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
|
||||
_is_native_anthropic = agent.provider == "anthropic"
|
||||
effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
|
||||
|
||||
# MiniMax OAuth issues short-lived (~15-min) access tokens. The
|
||||
# Anthropic SDK caches ``api_key`` as a static string at client
|
||||
# construction time, so a session that resolves the bearer once
|
||||
# at startup will keep sending the same token until MiniMax
|
||||
# returns 401 mid-session. Swap the static string for a callable
|
||||
# token provider — ``build_anthropic_client`` recognizes the
|
||||
# callable and installs an httpx event hook that mints a fresh
|
||||
# bearer per outbound request (re-reading auth.json so a refresh
|
||||
# persisted by another process is visible immediately).
|
||||
# The cached refresh path is a no-op when the token still has
|
||||
# ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of life left, so steady-
|
||||
# state cost is one file read + one timestamp compare per request.
|
||||
if agent.provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
|
||||
try:
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
effective_key = build_minimax_oauth_token_provider()
|
||||
except Exception as _mm_exc: # noqa: BLE001 — never block startup on this
|
||||
import logging as _logging
|
||||
_logging.getLogger(__name__).warning(
|
||||
"MiniMax OAuth: failed to install per-request token provider "
|
||||
"(%s); falling back to static bearer that will expire ~15min in.",
|
||||
_mm_exc,
|
||||
)
|
||||
|
||||
agent.api_key = effective_key
|
||||
agent._anthropic_api_key = effective_key
|
||||
agent._anthropic_base_url = base_url
|
||||
|
|
@ -618,7 +643,7 @@ def init_agent(
|
|||
# that cause 401/403 on their endpoints. Guards #1739 and
|
||||
# the third-party identity-injection bug.
|
||||
from agent.anthropic_adapter import _is_oauth_token as _is_oat
|
||||
agent._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False
|
||||
agent._is_anthropic_oauth = _is_oat(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
|
||||
agent._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
|
||||
# No OpenAI client needed for Anthropic mode
|
||||
agent.client = None
|
||||
|
|
|
|||
|
|
@ -1352,6 +1352,22 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
|||
# API key — falling back would send Anthropic credentials to third-party endpoints.
|
||||
_is_native_anthropic = new_provider == "anthropic"
|
||||
effective_key = (api_key or agent.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or agent.api_key or "")
|
||||
|
||||
# MiniMax OAuth: swap static string for a per-request callable token
|
||||
# provider so the rebuilt client survives 15-min token expiry. See
|
||||
# the matching block in agent_init.py for the full rationale.
|
||||
if new_provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
|
||||
try:
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
effective_key = build_minimax_oauth_token_provider()
|
||||
except Exception as _mm_exc: # noqa: BLE001
|
||||
import logging as _logging
|
||||
_logging.getLogger(__name__).warning(
|
||||
"MiniMax OAuth: failed to install per-request token provider "
|
||||
"on switch (%s); using static bearer.",
|
||||
_mm_exc,
|
||||
)
|
||||
|
||||
agent.api_key = effective_key
|
||||
agent._anthropic_api_key = effective_key
|
||||
agent._anthropic_base_url = base_url or getattr(agent, "_anthropic_base_url", None)
|
||||
|
|
@ -1359,7 +1375,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
|||
effective_key, agent._anthropic_base_url,
|
||||
timeout=get_provider_request_timeout(agent.provider, agent.model),
|
||||
)
|
||||
agent._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False
|
||||
agent._is_anthropic_oauth = _is_oauth_token(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
|
||||
agent.client = None
|
||||
agent._client_kwargs = {}
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -7106,10 +7106,95 @@ def _refresh_minimax_oauth_state(
|
|||
return new_state
|
||||
|
||||
|
||||
def _minimax_oauth_quarantine_on_terminal_refresh(state: Dict[str, Any], exc: AuthError) -> None:
|
||||
"""Wipe dead tokens from auth.json after a terminal refresh failure.
|
||||
|
||||
Shared by both the eager-resolve path and the lazy per-request token
|
||||
provider. Mirrors the Nous / xAI-OAuth / Codex-OAuth quarantine pattern
|
||||
so subsequent calls fail fast without a network retry.
|
||||
"""
|
||||
if not (exc.relogin_required and state.get("refresh_token")):
|
||||
return
|
||||
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
|
||||
state.pop(_k, None)
|
||||
state["last_auth_error"] = {
|
||||
"provider": "minimax-oauth",
|
||||
"code": exc.code or "refresh_failed",
|
||||
"message": str(exc),
|
||||
"reason": "runtime_refresh_failure",
|
||||
"relogin_required": True,
|
||||
"at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
try:
|
||||
_minimax_save_auth_state(state)
|
||||
except Exception as _save_exc:
|
||||
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
|
||||
|
||||
|
||||
def build_minimax_oauth_token_provider() -> Callable[[], str]:
|
||||
"""Return a zero-arg callable that yields a fresh MiniMax access token.
|
||||
|
||||
The Anthropic SDK caches ``api_key`` as a static string at construction
|
||||
time, so a session that resolves credentials once at startup will keep
|
||||
sending the same bearer until MiniMax's server returns 401 — typically
|
||||
~15 minutes in, because MiniMax issues short-lived access tokens.
|
||||
|
||||
Returning a *callable* instead of a string lets us hook into the
|
||||
existing Entra-ID bearer infrastructure in
|
||||
:mod:`agent.anthropic_adapter`: ``build_anthropic_client`` detects a
|
||||
callable and routes through ``_build_anthropic_client_with_bearer_hook``,
|
||||
which mints a fresh ``Authorization`` header on every outbound request.
|
||||
Each invocation re-reads the persisted state from ``auth.json`` and
|
||||
calls :func:`_refresh_minimax_oauth_state` — that helper is a no-op
|
||||
when the token still has more than ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS``
|
||||
of life left, so the steady-state cost is one file read + one
|
||||
timestamp compare per request.
|
||||
|
||||
Reading state fresh each time also means a refresh persisted by one
|
||||
process (CLI, gateway, cron) is immediately visible to every other
|
||||
process sharing the same ``auth.json``.
|
||||
"""
|
||||
def _provide() -> str:
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if not state or not state.get("access_token"):
|
||||
raise AuthError(
|
||||
"Not logged into MiniMax OAuth. Run `hermes model` and select "
|
||||
"MiniMax (OAuth).",
|
||||
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
|
||||
)
|
||||
try:
|
||||
state = _refresh_minimax_oauth_state(state)
|
||||
except AuthError as exc:
|
||||
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
|
||||
raise
|
||||
token = state.get("access_token")
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"MiniMax OAuth state has no access_token after refresh.",
|
||||
provider="minimax-oauth", code="no_access_token", relogin_required=True,
|
||||
)
|
||||
return token
|
||||
|
||||
return _provide
|
||||
|
||||
|
||||
def resolve_minimax_oauth_runtime_credentials(
|
||||
*, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
|
||||
as_token_provider: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Return {provider, api_key, base_url, source} for minimax-oauth."""
|
||||
"""Return {provider, api_key, base_url, source} for minimax-oauth.
|
||||
|
||||
When ``as_token_provider`` is True, ``api_key`` is a zero-arg callable
|
||||
that mints a fresh access token per call (proactively refreshing if
|
||||
the cached token is within ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of
|
||||
expiry). This is what the runtime provider path uses so that long
|
||||
sessions survive MiniMax's short access-token lifetime — see
|
||||
:func:`build_minimax_oauth_token_provider` for the rationale.
|
||||
|
||||
The default (string ``api_key``) preserves the historical contract for
|
||||
diagnostic call sites like ``hermes status`` that just want to know
|
||||
whether a valid token exists right now.
|
||||
"""
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if not state or not state.get("access_token"):
|
||||
raise AuthError(
|
||||
|
|
@ -7120,28 +7205,15 @@ def resolve_minimax_oauth_runtime_credentials(
|
|||
try:
|
||||
state = _refresh_minimax_oauth_state(state)
|
||||
except AuthError as exc:
|
||||
if exc.relogin_required and state.get("refresh_token"):
|
||||
# Terminal refresh failure — clear dead tokens from auth.json so
|
||||
# subsequent calls fail fast without a network retry, mirroring
|
||||
# the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
|
||||
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
|
||||
state.pop(_k, None)
|
||||
state["last_auth_error"] = {
|
||||
"provider": "minimax-oauth",
|
||||
"code": exc.code or "refresh_failed",
|
||||
"message": str(exc),
|
||||
"reason": "runtime_refresh_failure",
|
||||
"relogin_required": True,
|
||||
"at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
try:
|
||||
_minimax_save_auth_state(state)
|
||||
except Exception as _save_exc:
|
||||
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
|
||||
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
|
||||
raise
|
||||
if as_token_provider:
|
||||
api_key: Any = build_minimax_oauth_token_provider()
|
||||
else:
|
||||
api_key = state["access_token"]
|
||||
return {
|
||||
"provider": "minimax-oauth",
|
||||
"api_key": state["access_token"],
|
||||
"api_key": api_key,
|
||||
"base_url": state["inference_base_url"].rstrip("/"),
|
||||
"source": "oauth",
|
||||
}
|
||||
|
|
|
|||
BIN
infographic/minimax-oauth-token-refresh/infographic.png
Normal file
BIN
infographic/minimax-oauth-token-refresh/infographic.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.1 MiB |
|
|
@ -642,3 +642,202 @@ def test_generic_auth_status_dispatches_minimax_oauth():
|
|||
assert status["logged_in"] is True
|
||||
assert status["provider"] == "minimax-oauth"
|
||||
assert status["region"] == "global"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_minimax_oauth_token_provider — per-request callable bearer
|
||||
# ---------------------------------------------------------------------------
|
||||
# These tests verify the fix for short-lived (~15-min) MiniMax access tokens
|
||||
# expiring mid-session. The callable is invoked by the Anthropic SDK on every
|
||||
# outbound request via the existing Entra-style bearer hook.
|
||||
|
||||
|
||||
def test_token_provider_returns_current_access_token_when_fresh():
|
||||
"""When token is far from expiry, callable just returns the cached token."""
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
|
||||
state = {
|
||||
"access_token": "still-fresh",
|
||||
"refresh_token": "rt",
|
||||
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
"client_id": MINIMAX_OAUTH_CLIENT_ID,
|
||||
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
"expires_at": _future_iso(3600),
|
||||
}
|
||||
|
||||
provider = build_minimax_oauth_token_provider()
|
||||
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state), \
|
||||
patch("httpx.Client") as mock_client_class:
|
||||
token = provider()
|
||||
# No network call should happen — token is fresh.
|
||||
mock_client_class.assert_not_called()
|
||||
|
||||
assert token == "still-fresh"
|
||||
|
||||
|
||||
def test_token_provider_refreshes_when_near_expiry():
|
||||
"""When token is within the skew window, callable mints a fresh one."""
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
|
||||
state = {
|
||||
"access_token": "about-to-die",
|
||||
"refresh_token": "rt",
|
||||
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
"client_id": MINIMAX_OAUTH_CLIENT_ID,
|
||||
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
"expires_at": _future_iso(MINIMAX_OAUTH_REFRESH_SKEW_SECONDS - 1),
|
||||
}
|
||||
|
||||
refreshed_body = {
|
||||
"status": "success",
|
||||
"access_token": "fresh-bearer",
|
||||
"refresh_token": "rt2",
|
||||
"expired_in": 900,
|
||||
}
|
||||
mock_resp = _make_httpx_response(200, refreshed_body)
|
||||
|
||||
provider = build_minimax_oauth_token_provider()
|
||||
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state), \
|
||||
patch("httpx.Client") as mock_client_class, \
|
||||
patch("hermes_cli.auth._minimax_save_auth_state"):
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.__enter__ = MagicMock(return_value=mock_instance)
|
||||
mock_instance.__exit__ = MagicMock(return_value=False)
|
||||
mock_instance.post.return_value = mock_resp
|
||||
mock_client_class.return_value = mock_instance
|
||||
|
||||
token = provider()
|
||||
|
||||
assert token == "fresh-bearer"
|
||||
|
||||
|
||||
def test_token_provider_rereads_state_each_call():
|
||||
"""Each callable invocation re-reads auth.json so cross-process refreshes
|
||||
persisted by another hermes process are immediately visible."""
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
|
||||
states = [
|
||||
{
|
||||
"access_token": "first-token",
|
||||
"refresh_token": "rt",
|
||||
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
"client_id": MINIMAX_OAUTH_CLIENT_ID,
|
||||
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
"expires_at": _future_iso(3600),
|
||||
},
|
||||
{
|
||||
"access_token": "second-token-after-another-process-refreshed",
|
||||
"refresh_token": "rt",
|
||||
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
"client_id": MINIMAX_OAUTH_CLIENT_ID,
|
||||
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
"expires_at": _future_iso(3600),
|
||||
},
|
||||
]
|
||||
|
||||
provider = build_minimax_oauth_token_provider()
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", side_effect=states):
|
||||
first = provider()
|
||||
second = provider()
|
||||
|
||||
assert first == "first-token"
|
||||
assert second == "second-token-after-another-process-refreshed"
|
||||
|
||||
|
||||
def test_token_provider_raises_not_logged_in_when_state_missing():
|
||||
"""No state in auth.json → AuthError(not_logged_in, relogin_required=True)."""
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
|
||||
provider = build_minimax_oauth_token_provider()
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=None):
|
||||
with pytest.raises(AuthError) as exc_info:
|
||||
provider()
|
||||
|
||||
assert exc_info.value.code == "not_logged_in"
|
||||
assert exc_info.value.relogin_required is True
|
||||
|
||||
|
||||
def test_token_provider_quarantines_state_on_terminal_refresh():
|
||||
"""When refresh returns invalid_grant, callable raises AuthError AND
|
||||
wipes the dead tokens so subsequent calls fail fast without network."""
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
|
||||
state = {
|
||||
"access_token": "expired",
|
||||
"refresh_token": "burned-rt",
|
||||
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
"client_id": MINIMAX_OAUTH_CLIENT_ID,
|
||||
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
"expires_at": _past_iso(100),
|
||||
}
|
||||
|
||||
bad_resp = _make_httpx_response(400, text="invalid_grant")
|
||||
bad_resp.json.side_effect = Exception("no json")
|
||||
bad_resp.text = "invalid_grant"
|
||||
bad_resp.reason_phrase = "Bad Request"
|
||||
|
||||
saved_states: list[dict] = []
|
||||
|
||||
provider = build_minimax_oauth_token_provider()
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state), \
|
||||
patch("httpx.Client") as mock_client_class, \
|
||||
patch(
|
||||
"hermes_cli.auth._minimax_save_auth_state",
|
||||
side_effect=lambda s: saved_states.append(dict(s)),
|
||||
):
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.__enter__ = MagicMock(return_value=mock_instance)
|
||||
mock_instance.__exit__ = MagicMock(return_value=False)
|
||||
mock_instance.post.return_value = bad_resp
|
||||
mock_client_class.return_value = mock_instance
|
||||
|
||||
with pytest.raises(AuthError) as exc_info:
|
||||
provider()
|
||||
|
||||
assert exc_info.value.relogin_required is True
|
||||
# Quarantine wrote a state with tokens removed.
|
||||
assert len(saved_states) == 1
|
||||
quarantined = saved_states[0]
|
||||
assert "access_token" not in quarantined
|
||||
assert "refresh_token" not in quarantined
|
||||
assert quarantined["last_auth_error"]["relogin_required"] is True
|
||||
|
||||
|
||||
def test_resolve_returns_callable_when_as_token_provider_true():
|
||||
"""Explicit opt-in path: resolve_minimax_oauth_runtime_credentials(as_token_provider=True)
|
||||
returns a callable api_key."""
|
||||
state = {
|
||||
"access_token": "tok",
|
||||
"refresh_token": "rt",
|
||||
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
"client_id": MINIMAX_OAUTH_CLIENT_ID,
|
||||
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
"expires_at": _future_iso(3600),
|
||||
}
|
||||
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state):
|
||||
creds = resolve_minimax_oauth_runtime_credentials(as_token_provider=True)
|
||||
|
||||
assert callable(creds["api_key"])
|
||||
assert not isinstance(creds["api_key"], str)
|
||||
assert creds["base_url"] == MINIMAX_OAUTH_GLOBAL_INFERENCE.rstrip("/")
|
||||
|
||||
|
||||
def test_resolve_returns_string_by_default():
|
||||
"""Backwards-compatible default: api_key is a string materialized once."""
|
||||
state = {
|
||||
"access_token": "tok",
|
||||
"refresh_token": "rt",
|
||||
"portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
"client_id": MINIMAX_OAUTH_CLIENT_ID,
|
||||
"inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
"expires_at": _future_iso(3600),
|
||||
}
|
||||
|
||||
with patch("hermes_cli.auth.get_provider_auth_state", return_value=state):
|
||||
creds = resolve_minimax_oauth_runtime_credentials()
|
||||
|
||||
assert creds["api_key"] == "tok"
|
||||
assert isinstance(creds["api_key"], str)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue