mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(mcp-oauth): persist OAuth server metadata across process restarts (#21226)
The MCP SDK discovers OAuth server metadata (token_endpoint, etc.) on
demand and keeps it in memory only. Without disk persistence, a restart
with valid cached refresh tokens forces the SDK to fall back to the
guessed '{server_url}/token' path — which returns 404 on most real
providers (Notion, Atlassian, GitHub remote MCP, etc.) and triggers a
full browser re-authorization even though the refresh token is fine.
Add a .meta.json file next to the existing tokens/client_info files:
HERMES_HOME/mcp-tokens/<server>.json -- tokens (existing)
HERMES_HOME/mcp-tokens/<server>.client.json -- client info (existing)
HERMES_HOME/mcp-tokens/<server>.meta.json -- oauth metadata (new)
Changes:
- HermesTokenStorage.save_oauth_metadata / load_oauth_metadata / _meta_path
— disk layer for the discovered OAuthMetadata.
- HermesTokenStorage.remove() now also clears .meta.json so
'hermes mcp remove <name>' and the manager's remove() path clean up fully.
- HermesMCPOAuthProvider._initialize cold-restores from disk before the
existing pre-flight discovery runs. If disk has metadata we skip the
discovery HTTP round-trips entirely.
- HermesMCPOAuthProvider._prefetch_oauth_metadata now persists ASM as
soon as it's discovered, so even the first pre-flight run seeds disk.
- HermesMCPOAuthProvider._persist_oauth_metadata_if_changed() is called
at the end of async_auth_flow so metadata discovered via the SDK's
lazy 401-branch (not pre-flight) is also saved for next time.
Tests cover the storage roundtrip (save/load/missing/corrupt/remove) and
the manager provider path (cold-load restore, skip-when-in-memory,
persist-on-discover, noop-when-unchanged, end-to-end async_auth_flow).
Co-authored-by: nocturnum91 <50326054+nocturnum91@users.noreply.github.com>
This commit is contained in:
parent
3c439ec681
commit
c4a7992317
4 changed files with 293 additions and 1 deletions
|
|
@ -61,6 +61,7 @@ try:
|
|||
from mcp.shared.auth import (
|
||||
OAuthClientInformationFull,
|
||||
OAuthClientMetadata,
|
||||
OAuthMetadata,
|
||||
OAuthToken,
|
||||
)
|
||||
|
||||
|
|
@ -212,6 +213,7 @@ class HermesTokenStorage:
|
|||
|
||||
HERMES_HOME/mcp-tokens/<server_name>.json -- tokens
|
||||
HERMES_HOME/mcp-tokens/<server_name>.client.json -- client info
|
||||
HERMES_HOME/mcp-tokens/<server_name>.meta.json -- oauth server metadata
|
||||
"""
|
||||
|
||||
def __init__(self, server_name: str):
|
||||
|
|
@ -223,6 +225,9 @@ class HermesTokenStorage:
|
|||
def _client_info_path(self) -> Path:
|
||||
return _get_token_dir() / f"{self._server_name}.client.json"
|
||||
|
||||
def _meta_path(self) -> Path:
|
||||
return _get_token_dir() / f"{self._server_name}.meta.json"
|
||||
|
||||
# -- tokens ------------------------------------------------------------
|
||||
|
||||
async def get_tokens(self) -> "OAuthToken | None":
|
||||
|
|
@ -300,11 +305,33 @@ class HermesTokenStorage:
|
|||
_write_json(self._client_info_path(), client_info.model_dump(mode="json", exclude_none=True))
|
||||
logger.debug("OAuth client info saved for %s", self._server_name)
|
||||
|
||||
# -- oauth server metadata --------------------------------------------
|
||||
# The MCP SDK keeps discovered ``OAuthMetadata`` (token endpoint URL,
|
||||
# etc.) in memory only. Persisting it here lets a restarted process
|
||||
# refresh tokens without re-running metadata discovery. Without this,
|
||||
# cold-start refresh requests fall back to the SDK's guessed
|
||||
# ``{server_url}/token`` which returns 404 on most real providers and
|
||||
# forces a full browser re-authorization.
|
||||
|
||||
def save_oauth_metadata(self, metadata: "OAuthMetadata") -> None:
|
||||
_write_json(self._meta_path(), metadata.model_dump(exclude_none=True, mode="json"))
|
||||
logger.debug("OAuth metadata saved for %s", self._server_name)
|
||||
|
||||
def load_oauth_metadata(self) -> "OAuthMetadata | None":
|
||||
data = _read_json(self._meta_path())
|
||||
if data is None:
|
||||
return None
|
||||
try:
|
||||
return OAuthMetadata.model_validate(data)
|
||||
except (ValueError, TypeError, KeyError) as exc:
|
||||
logger.warning("Corrupt OAuth metadata at %s -- ignoring: %s", self._meta_path(), exc)
|
||||
return None
|
||||
|
||||
# -- cleanup -----------------------------------------------------------
|
||||
|
||||
def remove(self) -> None:
|
||||
"""Delete all stored OAuth state for this server."""
|
||||
for p in (self._tokens_path(), self._client_info_path()):
|
||||
for p in (self._tokens_path(), self._client_info_path(), self._meta_path()):
|
||||
p.unlink(missing_ok=True)
|
||||
|
||||
def has_cached_tokens(self) -> bool:
|
||||
|
|
|
|||
|
|
@ -148,6 +148,27 @@ def _make_hermes_provider_class() -> Optional[type]:
|
|||
if tokens is not None and tokens.expires_in is not None:
|
||||
self.context.update_token_expiry(tokens)
|
||||
|
||||
# Cold-load: restore OAuth server metadata from disk before any
|
||||
# refresh attempt. Without this, a restarted process with cached
|
||||
# tokens but no in-memory metadata would fall back to the SDK's
|
||||
# guessed ``{server_url}/token`` path (returns 404 on most real
|
||||
# providers) and require a full browser re-authorization.
|
||||
storage = self.context.storage
|
||||
from tools.mcp_oauth import HermesTokenStorage
|
||||
if (
|
||||
isinstance(storage, HermesTokenStorage)
|
||||
and self.context.oauth_metadata is None
|
||||
):
|
||||
meta = storage.load_oauth_metadata()
|
||||
if meta is not None:
|
||||
self.context.oauth_metadata = meta
|
||||
logger.debug(
|
||||
"MCP OAuth '%s': restored metadata from disk "
|
||||
"(token_endpoint=%s)",
|
||||
self._hermes_server_name,
|
||||
meta.token_endpoint,
|
||||
)
|
||||
|
||||
# Pre-flight OAuth AS discovery so ``_refresh_token`` has a
|
||||
# correct ``token_endpoint`` before the first refresh attempt.
|
||||
# Only runs when we have tokens on cold-load but no cached
|
||||
|
|
@ -229,6 +250,12 @@ def _make_hermes_provider_class() -> Optional[type]:
|
|||
break
|
||||
if asm:
|
||||
self.context.oauth_metadata = asm
|
||||
# Persist immediately so a subsequent cold-load can
|
||||
# skip discovery entirely.
|
||||
storage = self.context.storage
|
||||
from tools.mcp_oauth import HermesTokenStorage
|
||||
if isinstance(storage, HermesTokenStorage):
|
||||
storage.save_oauth_metadata(asm)
|
||||
logger.debug(
|
||||
"MCP OAuth '%s': pre-flight ASM discovered "
|
||||
"token_endpoint=%s",
|
||||
|
|
@ -236,6 +263,27 @@ def _make_hermes_provider_class() -> Optional[type]:
|
|||
)
|
||||
break
|
||||
|
||||
def _persist_oauth_metadata_if_changed(self) -> None:
|
||||
"""Persist discovered OAuth metadata for future process restarts.
|
||||
|
||||
Called after the SDK's normal 401-branch auth flow completes so
|
||||
metadata discovered via the lazy path (not pre-flight) is also
|
||||
saved. No-op when nothing to persist or metadata hasn't changed.
|
||||
"""
|
||||
meta = self.context.oauth_metadata
|
||||
if meta is None:
|
||||
return
|
||||
storage = self.context.storage
|
||||
from tools.mcp_oauth import HermesTokenStorage
|
||||
if not isinstance(storage, HermesTokenStorage):
|
||||
return
|
||||
existing = storage.load_oauth_metadata()
|
||||
if (
|
||||
existing is None
|
||||
or str(existing.token_endpoint) != str(meta.token_endpoint)
|
||||
):
|
||||
storage.save_oauth_metadata(meta)
|
||||
|
||||
async def async_auth_flow(self, request): # type: ignore[override]
|
||||
# Pre-flow hook: ask the manager to refresh from disk if needed.
|
||||
# Any failure here is non-fatal — we just log and proceed with
|
||||
|
|
@ -271,6 +319,9 @@ def _make_hermes_provider_class() -> Optional[type]:
|
|||
incoming = yield outgoing
|
||||
outgoing = await inner.asend(incoming)
|
||||
except StopAsyncIteration:
|
||||
# Persist any metadata the SDK discovered lazily during the
|
||||
# 401 branch so a subsequent cold-load skips discovery.
|
||||
self._persist_oauth_metadata_if_changed()
|
||||
return
|
||||
|
||||
return HermesMCPOAuthProvider
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue