Switch to JWT token for inference against Nous, falling back to old opaque token on failure.

This commit is contained in:
Robin Fernandes 2026-05-17 19:34:44 +10:00 committed by Teknium
parent c905562623
commit 89a3d038cf
10 changed files with 780 additions and 45 deletions

View file

@ -875,10 +875,9 @@ def _resolve_explicit_runtime(
explicit_base_url
or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
)
# Only use agent_key for inference — access_token is an OAuth token for the
# portal API (minting keys, refreshing tokens), not for the inference API.
# Falling back to access_token sends an OAuth bearer token to the inference
# endpoint, which returns 404 because it is not a valid inference credential.
# Only use the agent_key compatibility field for inference. It may be
# either a NAS invoke JWT or a legacy opaque session key; raw OAuth
# access_token fallback is handled by resolve_nous_runtime_credentials().
api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
if not api_key:
@ -1069,17 +1068,19 @@ def resolve_runtime_provider(
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
)
# For Nous, the pool entry's runtime_api_key is the agent_key — a
# short-lived inference credential (~30 min TTL). The pool doesn't
# For Nous, the pool entry's runtime_api_key is the agent_key
# compatibility field: either an invoke JWT or legacy opaque key.
# The pool doesn't
# refresh it during selection (that would trigger network calls in
# non-runtime contexts like `hermes auth list`). If the key is
# expired, clear pool_api_key so we fall through to
# resolve_nous_runtime_credentials() which handles refresh + mint.
# resolve_nous_runtime_credentials() which handles refresh + fallback.
if provider == "nous" and entry is not None and pool_api_key:
min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
nous_state = {
"agent_key": getattr(entry, "agent_key", None),
"agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
"scope": getattr(entry, "scope", None),
}
if not _agent_key_is_usable(nous_state, min_ttl):
logger.debug("Nous pool entry agent_key expired/missing, falling through to runtime resolution")