fix(mcp-oauth): persist OAuth server metadata across process restarts (#21226)

The MCP SDK discovers OAuth server metadata (token_endpoint, etc.) on
demand and keeps it in memory only. Without disk persistence, a restart
with valid cached refresh tokens forces the SDK to fall back to the
guessed '{server_url}/token' path — which returns 404 on most real
providers (Notion, Atlassian, GitHub remote MCP, etc.) and triggers a
full browser re-authorization even though the refresh token is fine.

Add a .meta.json file next to the existing tokens/client_info files:

  HERMES_HOME/mcp-tokens/<server>.json        -- tokens (existing)
  HERMES_HOME/mcp-tokens/<server>.client.json -- client info (existing)
  HERMES_HOME/mcp-tokens/<server>.meta.json   -- oauth metadata (new)

Changes:
- HermesTokenStorage.save_oauth_metadata / load_oauth_metadata / _meta_path
  — disk layer for the discovered OAuthMetadata.
- HermesTokenStorage.remove() now also clears .meta.json so
  'hermes mcp remove <name>' and the manager's remove() path clean up fully.
- HermesMCPOAuthProvider._initialize cold-restores from disk before the
  existing pre-flight discovery runs. If disk has metadata we skip the
  discovery HTTP round-trips entirely.
- HermesMCPOAuthProvider._prefetch_oauth_metadata now persists ASM as
  soon as it's discovered, so even the first pre-flight run seeds disk.
- HermesMCPOAuthProvider._persist_oauth_metadata_if_changed() is called
  at the end of async_auth_flow so metadata discovered via the SDK's
  lazy 401-branch (not pre-flight) is also saved for next time.

Tests cover the storage roundtrip (save/load/missing/corrupt/remove) and
the manager provider path (cold-load restore, skip-when-in-memory,
persist-on-discover, noop-when-unchanged, end-to-end async_auth_flow).

Co-authored-by: nocturnum91 <50326054+nocturnum91@users.noreply.github.com>
This commit is contained in:
Teknium 2026-05-07 05:35:33 -07:00 committed by GitHub
parent 3c439ec681
commit c4a7992317
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 293 additions and 1 deletions

View file

@ -61,6 +61,7 @@ try:
from mcp.shared.auth import (
OAuthClientInformationFull,
OAuthClientMetadata,
OAuthMetadata,
OAuthToken,
)
@ -212,6 +213,7 @@ class HermesTokenStorage:
HERMES_HOME/mcp-tokens/<server_name>.json -- tokens
HERMES_HOME/mcp-tokens/<server_name>.client.json -- client info
HERMES_HOME/mcp-tokens/<server_name>.meta.json -- oauth server metadata
"""
def __init__(self, server_name: str):
@ -223,6 +225,9 @@ class HermesTokenStorage:
def _client_info_path(self) -> Path:
return _get_token_dir() / f"{self._server_name}.client.json"
def _meta_path(self) -> Path:
return _get_token_dir() / f"{self._server_name}.meta.json"
# -- tokens ------------------------------------------------------------
async def get_tokens(self) -> "OAuthToken | None":
@ -300,11 +305,33 @@ class HermesTokenStorage:
_write_json(self._client_info_path(), client_info.model_dump(mode="json", exclude_none=True))
logger.debug("OAuth client info saved for %s", self._server_name)
# -- oauth server metadata --------------------------------------------
# The MCP SDK keeps discovered ``OAuthMetadata`` (token endpoint URL,
# etc.) in memory only. Persisting it here lets a restarted process
# refresh tokens without re-running metadata discovery. Without this,
# cold-start refresh requests fall back to the SDK's guessed
# ``{server_url}/token`` which returns 404 on most real providers and
# forces a full browser re-authorization.
def save_oauth_metadata(self, metadata: "OAuthMetadata") -> None:
_write_json(self._meta_path(), metadata.model_dump(exclude_none=True, mode="json"))
logger.debug("OAuth metadata saved for %s", self._server_name)
def load_oauth_metadata(self) -> "OAuthMetadata | None":
data = _read_json(self._meta_path())
if data is None:
return None
try:
return OAuthMetadata.model_validate(data)
except (ValueError, TypeError, KeyError) as exc:
logger.warning("Corrupt OAuth metadata at %s -- ignoring: %s", self._meta_path(), exc)
return None
# -- cleanup -----------------------------------------------------------
def remove(self) -> None:
"""Delete all stored OAuth state for this server."""
for p in (self._tokens_path(), self._client_info_path()):
for p in (self._tokens_path(), self._client_info_path(), self._meta_path()):
p.unlink(missing_ok=True)
def has_cached_tokens(self) -> bool: