From b17e5c101dff559dc5365028fd3bbcac6eae55c2 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 23 Mar 2026 22:37:13 +0530 Subject: [PATCH] Add pooled same-provider credential fallback --- agent/anthropic_adapter.py | 170 +++-- agent/auxiliary_client.py | 120 ++- agent/credential_pool.py | 456 +++++++++++ cli.py | 4 + ...-multi-credential-oauth-fallback-design.md | 718 ++++++++++++++++++ gateway/run.py | 1 + hermes_cli/auth.py | 393 +++++++--- hermes_cli/auth_commands.py | 238 ++++++ hermes_cli/main.py | 39 +- hermes_cli/runtime_provider.py | 77 +- run_agent.py | 87 +++ tests/agent/test_auxiliary_client.py | 80 ++ tests/test_auth_commands.py | 268 +++++++ tests/test_credential_pool.py | 297 ++++++++ tests/test_run_agent.py | 56 ++ tests/test_runtime_provider_resolution.py | 49 ++ tests/tools/test_delegate.py | 9 +- tests/tools/test_transcription.py | 5 + 18 files changed, 2872 insertions(+), 195 deletions(-) create mode 100644 agent/credential_pool.py create mode 100644 docs/plans/2026-03-23-multi-credential-oauth-fallback-design.md create mode 100644 hermes_cli/auth_commands.py create mode 100644 tests/test_auth_commands.py create mode 100644 tests/test_credential_pool.py diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index fc5c460d6d8..02faae075a2 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -201,60 +201,75 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool: return now_ms < (expires_at - 60_000) -def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: - """Attempt to refresh an expired Claude Code OAuth token. - - Uses the same token endpoint and client_id as Claude Code / OpenCode. - Only works for credentials that have a refresh token (from claude /login - or claude setup-token with OAuth flow). - - Returns the new access token, or None if refresh fails. - """ +def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]: + """Refresh an Anthropic OAuth token without mutating local credential files.""" + import time import urllib.parse import urllib.request - refresh_token = creds.get("refreshToken", "") if not refresh_token: - logger.debug("No refresh token available — cannot refresh") - return None + raise ValueError("refresh_token is required") - # Client ID used by Claude Code's OAuth flow - CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" - - data = urllib.parse.urlencode({ - "grant_type": "refresh_token", - "refresh_token": refresh_token, - "client_id": CLIENT_ID, - }).encode() + client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" + if use_json: + data = json.dumps({ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + }).encode() + content_type = "application/json" + else: + data = urllib.parse.urlencode({ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + }).encode() + content_type = "application/x-www-form-urlencoded" req = urllib.request.Request( "https://console.anthropic.com/v1/oauth/token", data=data, headers={ - "Content-Type": "application/x-www-form-urlencoded", + "Content-Type": content_type, "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)", }, method="POST", ) - try: - with urllib.request.urlopen(req, timeout=10) as resp: - result = json.loads(resp.read().decode()) - new_access = result.get("access_token", "") - new_refresh = result.get("refresh_token", refresh_token) - expires_in = result.get("expires_in", 3600) # seconds + with urllib.request.urlopen(req, timeout=10) as resp: + result = json.loads(resp.read().decode()) - if new_access: - import time - new_expires_ms = int(time.time() * 1000) + (expires_in * 1000) - # Write refreshed credentials back to ~/.claude/.credentials.json - _write_claude_code_credentials(new_access, new_refresh, new_expires_ms) - logger.debug("Successfully refreshed Claude Code OAuth token") - return new_access + access_token = result.get("access_token", "") + if not access_token: + raise ValueError("Anthropic refresh response was missing access_token") + next_refresh = result.get("refresh_token", refresh_token) + expires_in = result.get("expires_in", 3600) + return { + "access_token": access_token, + "refresh_token": next_refresh, + "expires_at_ms": int(time.time() * 1000) + (expires_in * 1000), + } + + +def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: + """Attempt to refresh an expired Claude Code OAuth token.""" + refresh_token = creds.get("refreshToken", "") + if not refresh_token: + logger.debug("No refresh token available — cannot refresh") + return None + + try: + refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False) + _write_claude_code_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], + ) + logger.debug("Successfully refreshed Claude Code OAuth token") + return refreshed["access_token"] except Exception as e: logger.debug("Failed to refresh Claude Code token: %s", e) - - return None + return None def _write_claude_code_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None: @@ -466,14 +481,8 @@ def _generate_pkce() -> tuple: return verifier, challenge -def run_hermes_oauth_login() -> Optional[str]: - """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription. - - Opens a browser to claude.ai for authorization, prompts for the code, - exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json. - - Returns the access token on success, None on failure. - """ +def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]: + """Run Hermes-native OAuth PKCE flow and return credential state.""" import time import webbrowser @@ -564,10 +573,32 @@ def run_hermes_oauth_login() -> Optional[str]: print("No access token in response.") return None - # Store credentials expires_at_ms = int(time.time() * 1000) + (expires_in * 1000) - _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms) + return { + "access_token": access_token, + "refresh_token": refresh_token, + "expires_at_ms": expires_at_ms, + } + +def run_hermes_oauth_login() -> Optional[str]: + """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription. + + Opens a browser to claude.ai for authorization, prompts for the code, + exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json. + + Returns the access token on success, None on failure. + """ + result = run_hermes_oauth_login_pure() + if not result: + return None + + access_token = result["access_token"] + refresh_token = result["refresh_token"] + expires_at_ms = result["expires_at_ms"] + + # Store credentials + _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms) # Also write to Claude Code's credential file for backward compat _write_claude_code_credentials(access_token, refresh_token, expires_at_ms) @@ -607,44 +638,27 @@ def refresh_hermes_oauth_token() -> Optional[str]: Returns the new access token, or None if refresh fails. """ - import time - import urllib.request - creds = read_hermes_oauth_credentials() if not creds or not creds.get("refreshToken"): return None try: - data = json.dumps({ - "grant_type": "refresh_token", - "refresh_token": creds["refreshToken"], - "client_id": _OAUTH_CLIENT_ID, - }).encode() - - req = urllib.request.Request( - _OAUTH_TOKEN_URL, - data=data, - headers={ - "Content-Type": "application/json", - "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)", - }, - method="POST", + refreshed = refresh_anthropic_oauth_pure( + creds["refreshToken"], + use_json=True, ) - - with urllib.request.urlopen(req, timeout=10) as resp: - result = json.loads(resp.read().decode()) - - new_access = result.get("access_token", "") - new_refresh = result.get("refresh_token", creds["refreshToken"]) - expires_in = result.get("expires_in", 3600) - - if new_access: - new_expires_ms = int(time.time() * 1000) + (expires_in * 1000) - _save_hermes_oauth_credentials(new_access, new_refresh, new_expires_ms) - # Also update Claude Code's credential file - _write_claude_code_credentials(new_access, new_refresh, new_expires_ms) - logger.debug("Successfully refreshed Hermes OAuth token") - return new_access + _save_hermes_oauth_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], + ) + _write_claude_code_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], + ) + logger.debug("Successfully refreshed Hermes OAuth token") + return refreshed["access_token"] except Exception as e: logger.debug("Failed to refresh Hermes OAuth token: %s", e) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 5d147e430bd..4ca5999ae59 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -47,6 +47,7 @@ from typing import Any, Dict, List, Optional, Tuple from openai import OpenAI +from agent.credential_pool import load_pool from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL @@ -96,6 +97,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex" _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" +def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]: + """Return (pool_exists_for_provider, selected_entry).""" + try: + pool = load_pool(provider) + except Exception as exc: + logger.debug("Auxiliary client: could not load pool for %s: %s", provider, exc) + return False, None + if not pool or not pool.has_credentials(): + return False, None + try: + return True, pool.select() + except Exception as exc: + logger.debug("Auxiliary client: could not select pool entry for %s: %s", provider, exc) + return True, None + + +def _pool_runtime_api_key(entry: Any) -> str: + if entry is None: + return "" + return str( + getattr(entry, "runtime_api_key", None) + or getattr(entry, "agent_key", None) + or getattr(entry, "access_token", "") + or "" + ).strip() + + +def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str: + if entry is None: + return str(fallback or "").strip().rstrip("/") + return str( + getattr(entry, "runtime_base_url", None) + or getattr(entry, "inference_base_url", None) + or getattr(entry, "base_url", None) + or fallback + or "" + ).strip().rstrip("/") + + # ── Codex Responses → chat.completions adapter ───────────────────────────── # All auxiliary consumers call client.chat.completions.create(**kwargs) and # read response.choices[0].message.content. This adapter translates those @@ -439,6 +479,21 @@ def _read_nous_auth() -> Optional[dict]: Returns the provider state dict if Nous is active with tokens, otherwise None. """ + pool_present, entry = _select_pool_entry("nous") + if pool_present: + if entry is None: + return None + return { + "access_token": getattr(entry, "access_token", ""), + "refresh_token": getattr(entry, "refresh_token", None), + "agent_key": getattr(entry, "agent_key", None), + "inference_base_url": _pool_runtime_base_url(entry, _NOUS_DEFAULT_BASE_URL), + "portal_base_url": getattr(entry, "portal_base_url", None), + "client_id": getattr(entry, "client_id", None), + "scope": getattr(entry, "scope", None), + "token_type": getattr(entry, "token_type", "Bearer"), + } + try: if not _AUTH_JSON_PATH.is_file(): return None @@ -467,6 +522,11 @@ def _nous_base_url() -> str: def _read_codex_access_token() -> Optional[str]: """Read a valid, non-expired Codex OAuth access token from Hermes auth store.""" + pool_present, entry = _select_pool_entry("openai-codex") + if pool_present: + token = _pool_runtime_api_key(entry) + return token or None + try: from hermes_cli.auth import _read_codex_tokens data = _read_codex_tokens() @@ -513,6 +573,24 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if provider_id == "anthropic": return _try_anthropic() + pool_present, entry = _select_pool_entry(provider_id) + if pool_present: + api_key = _pool_runtime_api_key(entry) + if not api_key: + continue + + base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url + model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default") + logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model) + extra = {} + if "api.kimi.com" in base_url.lower(): + extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + elif "api.githubcopilot.com" in base_url.lower(): + from hermes_cli.models import copilot_default_headers + + extra["default_headers"] = copilot_default_headers() + return OpenAI(api_key=api_key, base_url=base_url, **extra), model + creds = resolve_api_key_provider_credentials(provider_id) api_key = str(creds.get("api_key", "")).strip() if not api_key: @@ -562,6 +640,16 @@ def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]: def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: + pool_present, entry = _select_pool_entry("openrouter") + if pool_present: + or_key = _pool_runtime_api_key(entry) + if not or_key: + return None, None + base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL + logger.debug("Auxiliary client: OpenRouter via pool") + return OpenAI(api_key=or_key, base_url=base_url, + default_headers=_OR_HEADERS), _OPENROUTER_MODEL + or_key = os.getenv("OPENROUTER_API_KEY") if not or_key: return None, None @@ -578,7 +666,10 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]: auxiliary_is_nous = True logger.debug("Auxiliary client: Nous Portal") return ( - OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), + OpenAI( + api_key=_nous_api_key(nous), + base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"), + ), _NOUS_MODEL, ) @@ -654,11 +745,19 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: def _try_codex() -> Tuple[Optional[Any], Optional[str]]: - codex_token = _read_codex_access_token() - if not codex_token: - return None, None + pool_present, entry = _select_pool_entry("openai-codex") + if pool_present: + codex_token = _pool_runtime_api_key(entry) + if not codex_token: + return None, None + base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL + else: + codex_token = _read_codex_access_token() + if not codex_token: + return None, None + base_url = _CODEX_AUX_BASE_URL logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) - real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + real_client = OpenAI(api_key=codex_token, base_url=base_url) return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL @@ -668,14 +767,21 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: except ImportError: return None, None - token = resolve_anthropic_token() + pool_present, entry = _select_pool_entry("anthropic") + if pool_present: + if entry is None: + return None, None + token = _pool_runtime_api_key(entry) + else: + entry = None + token = resolve_anthropic_token() if not token: return None, None # Allow base URL override from config.yaml model.base_url, but only # when the configured provider is anthropic — otherwise a non-Anthropic # base_url (e.g. Codex endpoint) would leak into Anthropic requests. - base_url = _ANTHROPIC_DEFAULT_BASE_URL + base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL try: from hermes_cli.config import load_config cfg = load_config() diff --git a/agent/credential_pool.py b/agent/credential_pool.py new file mode 100644 index 00000000000..9256eab5b77 --- /dev/null +++ b/agent/credential_pool.py @@ -0,0 +1,456 @@ +"""Persistent multi-credential pool for same-provider failover.""" + +from __future__ import annotations + +import time +import uuid +import os +from dataclasses import dataclass, fields +from typing import Any, Dict, List, Optional + +from hermes_constants import OPENROUTER_BASE_URL +import hermes_cli.auth as auth_mod +from hermes_cli.auth import ( + ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + PROVIDER_REGISTRY, + _agent_key_is_usable, + _codex_access_token_is_expiring, + _decode_jwt_claims, + _is_expiring, + _load_auth_store, + _load_provider_state, + read_credential_pool, + write_credential_pool, +) + +EXHAUSTED_TTL_SECONDS = 24 * 60 * 60 + + +@dataclass +class PooledCredential: + provider: str + id: str + label: str + auth_type: str + priority: int + source: str + access_token: str + refresh_token: Optional[str] = None + last_status: Optional[str] = None + last_status_at: Optional[float] = None + last_error_code: Optional[int] = None + base_url: Optional[str] = None + expires_at: Optional[str] = None + expires_at_ms: Optional[int] = None + last_refresh: Optional[str] = None + token_type: Optional[str] = None + scope: Optional[str] = None + client_id: Optional[str] = None + portal_base_url: Optional[str] = None + inference_base_url: Optional[str] = None + obtained_at: Optional[str] = None + expires_in: Optional[int] = None + agent_key: Optional[str] = None + agent_key_id: Optional[str] = None + agent_key_expires_at: Optional[str] = None + agent_key_expires_in: Optional[int] = None + agent_key_reused: Optional[bool] = None + agent_key_obtained_at: Optional[str] = None + tls: Optional[Dict[str, Any]] = None + + @classmethod + def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential": + allowed = {f.name for f in fields(cls) if f.name != "provider"} + data = {k: payload.get(k) for k in allowed if k in payload} + data.setdefault("id", uuid.uuid4().hex[:6]) + data.setdefault("label", payload.get("source", provider)) + data.setdefault("auth_type", "api_key") + data.setdefault("priority", 0) + data.setdefault("source", "manual") + data.setdefault("access_token", "") + return cls(provider=provider, **data) + + def to_dict(self) -> Dict[str, Any]: + result: Dict[str, Any] = {} + for field_def in fields(self): + if field_def.name == "provider": + continue + value = getattr(self, field_def.name) + if value is not None: + result[field_def.name] = value + for key in ("last_status", "last_status_at", "last_error_code"): + result.setdefault(key, getattr(self, key)) + return result + + @property + def runtime_api_key(self) -> str: + if self.provider == "nous": + return str(self.agent_key or self.access_token or "") + return str(self.access_token or "") + + @property + def runtime_base_url(self) -> Optional[str]: + if self.provider == "nous": + return self.inference_base_url or self.base_url + return self.base_url + + +def _label_from_token(token: str, fallback: str) -> str: + claims = _decode_jwt_claims(token) + for key in ("email", "preferred_username", "upn"): + value = claims.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return fallback + + +def _next_priority(entries: List[PooledCredential]) -> int: + return max((entry.priority for entry in entries), default=-1) + 1 + + +class CredentialPool: + def __init__(self, provider: str, entries: List[PooledCredential]): + self.provider = provider + self._entries = sorted(entries, key=lambda entry: entry.priority) + self._current_id: Optional[str] = None + + def has_credentials(self) -> bool: + return bool(self._entries) + + def entries(self) -> List[PooledCredential]: + return list(sorted(self._entries, key=lambda entry: entry.priority)) + + def current(self) -> Optional[PooledCredential]: + if not self._current_id: + return None + return next((entry for entry in self._entries if entry.id == self._current_id), None) + + def _persist(self) -> None: + write_credential_pool( + self.provider, + [entry.to_dict() for entry in sorted(self._entries, key=lambda item: item.priority)], + ) + + def _mark_exhausted(self, entry: PooledCredential, status_code: Optional[int]) -> None: + entry.last_status = "exhausted" + entry.last_status_at = time.time() + entry.last_error_code = status_code + self._persist() + + def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]: + if entry.auth_type != "oauth" or not entry.refresh_token: + if force: + self._mark_exhausted(entry, None) + return None + + try: + if self.provider == "anthropic": + from agent.anthropic_adapter import refresh_anthropic_oauth_pure + + refreshed = refresh_anthropic_oauth_pure( + entry.refresh_token, + use_json=entry.source.endswith("hermes_pkce"), + ) + entry.access_token = refreshed["access_token"] + entry.refresh_token = refreshed["refresh_token"] + entry.expires_at_ms = refreshed["expires_at_ms"] + elif self.provider == "openai-codex": + refreshed = auth_mod.refresh_codex_oauth_pure( + entry.access_token, + entry.refresh_token, + ) + entry.access_token = refreshed["access_token"] + entry.refresh_token = refreshed["refresh_token"] + entry.last_refresh = refreshed.get("last_refresh") + elif self.provider == "nous": + refreshed = auth_mod.refresh_nous_oauth_pure( + entry.access_token, + entry.refresh_token, + entry.client_id or "hermes-cli", + entry.portal_base_url or "https://portal.nousresearch.com", + entry.inference_base_url or "https://inference-api.nousresearch.com/v1", + token_type=entry.token_type or "Bearer", + scope=entry.scope or "", + obtained_at=entry.obtained_at, + expires_at=entry.expires_at, + agent_key=entry.agent_key, + agent_key_expires_at=entry.agent_key_expires_at, + min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + force_refresh=force, + force_mint=force, + ) + for key, value in refreshed.items(): + if hasattr(entry, key): + setattr(entry, key, value) + else: + return entry + except Exception: + self._mark_exhausted(entry, None) + return None + + entry.last_status = "ok" + entry.last_status_at = None + entry.last_error_code = None + self._persist() + return entry + + def _entry_needs_refresh(self, entry: PooledCredential) -> bool: + if entry.auth_type != "oauth": + return False + if self.provider == "anthropic": + if entry.expires_at_ms is None: + return False + return int(entry.expires_at_ms) <= int(time.time() * 1000) + 120_000 + if self.provider == "openai-codex": + return _codex_access_token_is_expiring( + entry.access_token, + CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, + ) + if self.provider == "nous": + if _is_expiring(entry.expires_at, ACCESS_TOKEN_REFRESH_SKEW_SECONDS): + return True + return not _agent_key_is_usable( + { + "agent_key": entry.agent_key, + "agent_key_expires_at": entry.agent_key_expires_at, + }, + DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + ) + return False + + def select(self) -> Optional[PooledCredential]: + now = time.time() + for entry in sorted(self._entries, key=lambda item: item.priority): + if entry.last_status == "exhausted": + if entry.last_status_at and now - entry.last_status_at < EXHAUSTED_TTL_SECONDS: + continue + entry.last_status = "ok" + entry.last_status_at = None + entry.last_error_code = None + self._persist() + if self._entry_needs_refresh(entry): + refreshed = self._refresh_entry(entry, force=False) + if refreshed is None: + continue + entry = refreshed + self._current_id = entry.id + return entry + self._current_id = None + return None + + def mark_exhausted_and_rotate(self, *, status_code: Optional[int]) -> Optional[PooledCredential]: + entry = self.current() or self.select() + if entry is None: + return None + self._mark_exhausted(entry, status_code) + self._current_id = None + return self.select() + + def try_refresh_current(self) -> Optional[PooledCredential]: + entry = self.current() + if entry is None: + return None + refreshed = self._refresh_entry(entry, force=True) + if refreshed is not None: + self._current_id = refreshed.id + return refreshed + + def reset_statuses(self) -> int: + count = 0 + for entry in self._entries: + if entry.last_status or entry.last_status_at or entry.last_error_code: + entry.last_status = None + entry.last_status_at = None + entry.last_error_code = None + count += 1 + if count: + self._persist() + return count + + def remove_index(self, index: int) -> Optional[PooledCredential]: + ordered = sorted(self._entries, key=lambda item: item.priority) + if index < 1 or index > len(ordered): + return None + removed = ordered.pop(index - 1) + for new_priority, entry in enumerate(ordered): + entry.priority = new_priority + self._entries = ordered + self._persist() + if self._current_id == removed.id: + self._current_id = None + return removed + + def add_entry(self, entry: PooledCredential) -> PooledCredential: + entry.priority = _next_priority(self._entries) + self._entries.append(entry) + self._persist() + return entry + + +def _upsert_entry(entries: List[PooledCredential], provider: str, source: str, payload: Dict[str, Any]) -> bool: + existing = next((entry for entry in entries if entry.source == source), None) + if existing is None: + payload.setdefault("id", uuid.uuid4().hex[:6]) + payload.setdefault("priority", _next_priority(entries)) + payload.setdefault("label", payload.get("label") or source) + entries.append(PooledCredential.from_dict(provider, payload)) + return True + + changed = False + for key, value in payload.items(): + if key in {"id", "priority"} or value is None: + continue + if key == "label" and existing.label: + continue + if hasattr(existing, key) and getattr(existing, key) != value: + setattr(existing, key, value) + changed = True + return changed + + +def _seed_from_env(provider: str, entries: List[PooledCredential]) -> bool: + changed = False + if provider == "openrouter": + token = os.getenv("OPENROUTER_API_KEY", "").strip() + if token: + changed |= _upsert_entry( + entries, + provider, + "env:OPENROUTER_API_KEY", + { + "source": "env:OPENROUTER_API_KEY", + "auth_type": "api_key", + "access_token": token, + "base_url": OPENROUTER_BASE_URL, + "label": "OPENROUTER_API_KEY", + }, + ) + return changed + + pconfig = PROVIDER_REGISTRY.get(provider) + if not pconfig or pconfig.auth_type != "api_key": + return changed + + env_url = "" + if pconfig.base_url_env_var: + env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/") + + for env_var in pconfig.api_key_env_vars: + token = os.getenv(env_var, "").strip() + if not token: + continue + auth_type = "oauth" if provider == "anthropic" and not token.startswith("sk-ant-api") else "api_key" + base_url = env_url or pconfig.inference_base_url + changed |= _upsert_entry( + entries, + provider, + f"env:{env_var}", + { + "source": f"env:{env_var}", + "auth_type": auth_type, + "access_token": token, + "base_url": base_url, + "label": env_var, + }, + ) + return changed + + +def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> bool: + changed = False + auth_store = _load_auth_store() + + if provider == "anthropic": + from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials + + hermes_creds = read_hermes_oauth_credentials() + if hermes_creds and hermes_creds.get("accessToken"): + changed |= _upsert_entry( + entries, + provider, + "hermes_pkce", + { + "source": "hermes_pkce", + "auth_type": "oauth", + "access_token": hermes_creds.get("accessToken", ""), + "refresh_token": hermes_creds.get("refreshToken"), + "expires_at_ms": hermes_creds.get("expiresAt"), + "label": _label_from_token(hermes_creds.get("accessToken", ""), "hermes_pkce"), + }, + ) + claude_creds = read_claude_code_credentials() + if claude_creds and claude_creds.get("accessToken"): + changed |= _upsert_entry( + entries, + provider, + "claude_code", + { + "source": "claude_code", + "auth_type": "oauth", + "access_token": claude_creds.get("accessToken", ""), + "refresh_token": claude_creds.get("refreshToken"), + "expires_at_ms": claude_creds.get("expiresAt"), + "label": _label_from_token(claude_creds.get("accessToken", ""), "claude_code"), + }, + ) + + elif provider == "nous": + state = _load_provider_state(auth_store, "nous") + if state: + changed |= _upsert_entry( + entries, + provider, + "device_code", + { + "source": "device_code", + "auth_type": "oauth", + "access_token": state.get("access_token", ""), + "refresh_token": state.get("refresh_token"), + "expires_at": state.get("expires_at"), + "token_type": state.get("token_type"), + "scope": state.get("scope"), + "client_id": state.get("client_id"), + "portal_base_url": state.get("portal_base_url"), + "inference_base_url": state.get("inference_base_url"), + "agent_key": state.get("agent_key"), + "agent_key_expires_at": state.get("agent_key_expires_at"), + "label": _label_from_token(state.get("access_token", ""), "device_code"), + }, + ) + + elif provider == "openai-codex": + state = _load_provider_state(auth_store, "openai-codex") + tokens = state.get("tokens") if isinstance(state, dict) else None + if isinstance(tokens, dict) and tokens.get("access_token"): + changed |= _upsert_entry( + entries, + provider, + "device_code", + { + "source": "device_code", + "auth_type": "oauth", + "access_token": tokens.get("access_token", ""), + "refresh_token": tokens.get("refresh_token"), + "base_url": "https://chatgpt.com/backend-api/codex", + "last_refresh": state.get("last_refresh"), + "label": _label_from_token(tokens.get("access_token", ""), "device_code"), + }, + ) + + return changed + + +def load_pool(provider: str) -> CredentialPool: + provider = (provider or "").strip().lower() + raw_entries = read_credential_pool(provider) + entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries] + changed = _seed_from_singletons(provider, entries) + changed |= _seed_from_env(provider, entries) + if changed: + write_credential_pool( + provider, + [entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)], + ) + return CredentialPool(provider, entries) diff --git a/cli.py b/cli.py index af1077744c0..18ce982e59a 100644 --- a/cli.py +++ b/cli.py @@ -1761,6 +1761,7 @@ class HermesCLI: resolved_api_mode = runtime.get("api_mode", self.api_mode) resolved_acp_command = runtime.get("command") resolved_acp_args = list(runtime.get("args") or []) + resolved_credential_pool = runtime.get("credential_pool") if not isinstance(api_key, str) or not api_key: # Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often # don't require authentication. When a base_url IS configured but @@ -1793,6 +1794,7 @@ class HermesCLI: self.api_mode = resolved_api_mode self.acp_command = resolved_acp_command self.acp_args = resolved_acp_args + self._credential_pool = resolved_credential_pool self._provider_source = runtime.get("source") self.api_key = api_key self.base_url = base_url @@ -1894,6 +1896,7 @@ class HermesCLI: "api_mode": self.api_mode, "command": self.acp_command, "args": list(self.acp_args or []), + "credential_pool": getattr(self, "_credential_pool", None), } effective_model = model_override or self.model self.agent = AIAgent( @@ -1904,6 +1907,7 @@ class HermesCLI: api_mode=runtime.get("api_mode"), acp_command=runtime.get("command"), acp_args=runtime.get("args"), + credential_pool=runtime.get("credential_pool"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, diff --git a/docs/plans/2026-03-23-multi-credential-oauth-fallback-design.md b/docs/plans/2026-03-23-multi-credential-oauth-fallback-design.md new file mode 100644 index 00000000000..9cfdab3ec1c --- /dev/null +++ b/docs/plans/2026-03-23-multi-credential-oauth-fallback-design.md @@ -0,0 +1,718 @@ +# Multi-Credential OAuth Fallback + +**Date:** 2026-03-23 +**Status:** Design v3 — implementation-ready + +## Problem + +Hermes supports one credential per provider. When it runs out of credits (402) or hits hard rate limits (429), the user is stuck. Users with multiple OAuth accounts (e.g., personal Claude Pro + work Claude Max + API key) can't leverage them. + +## Design Decisions + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| Registration UX | `hermes auth add ` for both OAuth and API keys | Pool is the single authority — all credential types managed through one CLI | +| Rotation trigger | Rotate on 402 immediately; retry-then-rotate on 429 | Distinguishes transient throttle from hard credit cap | +| All exhausted | Fall through to existing cross-provider `_try_activate_fallback()` | Credential rotation = inner loop; cross-provider = outer loop | +| State persistence | Persist `last_status` + `last_status_at` to `auth.json`, 24h TTL | Avoids re-probing dead creds; TTL prevents stale-state bugs | +| Selection strategy | Fill-first (exhaust primary before advancing) | Matches "use primary until exhausted" goal | +| Pool entries | Provider-specific types, not generic + opaque bag | Each provider's refresh needs different state; typed entries make schema self-documenting | +| API key authority | Pool owns all keys — env vars seed pool on first run | One source of truth, no ambiguity between env/config.yaml/pool | +| Startup credential | `runtime_provider.py` consults pool | Pool is authoritative for initial credential, not env-var chain | +| Auxiliary clients | Independent — read pool `last_status` to skip dead creds only | Low-volume tasks; full pool wiring is disproportionate for v1 | + +--- + +## Data Model + +### Provider-Specific Pool Entries + +Stored in `~/.hermes/auth.json` under `credential_pool`. Each provider defines its own entry schema carrying exactly the fields its refresh logic needs. + +#### Anthropic + +```json +{ + "credential_pool": { + "anthropic": [ + { + "id": "a1b2c3", + "label": "user@gmail.com", + "auth_type": "oauth", + "priority": 0, + "source": "claude_code", + "access_token": "sk-ant-oat-...", + "refresh_token": "rt-...", + "expires_at_ms": 1711234567000, + "last_status": "ok", + "last_status_at": null, + "last_error_code": null + }, + { + "id": "d4e5f6", + "label": "work@company.com", + "auth_type": "oauth", + "priority": 1, + "source": "hermes_pkce", + "access_token": "sk-ant-oat-...", + "refresh_token": "rt-...", + "expires_at_ms": 1711234999000, + "last_status": "exhausted", + "last_status_at": 1711230000.0, + "last_error_code": 402 + }, + { + "id": "g7h8i9", + "label": "work-budget", + "auth_type": "api_key", + "priority": 2, + "source": "manual", + "access_token": "sk-ant-api-...", + "refresh_token": null, + "expires_at_ms": null, + "last_status": "ok", + "last_status_at": null, + "last_error_code": null + } + ] + } +} +``` + +Refresh needs: `refresh_token` only. The Anthropic OAuth token exchange returns a new `access_token` + `refresh_token` + `expires_in`. No extra state. + +#### Nous + +```json +{ + "credential_pool": { + "nous": [ + { + "id": "n1o2u3", + "label": "user@nous.com", + "auth_type": "oauth", + "priority": 0, + "source": "device_code", + "access_token": "eyJ...", + "refresh_token": "rt-...", + "expires_at": "2026-03-24T12:00:00+00:00", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "client_id": "hermes-cli", + "portal_base_url": "https://portal.nousresearch.com", + "inference_base_url": "https://inference-api.nousresearch.com/v1", + "agent_key": "ak-...", + "agent_key_expires_at": "2026-03-23T13:30:00+00:00", + "last_status": "ok", + "last_status_at": null, + "last_error_code": null + } + ] + } +} +``` + +Refresh needs: `access_token`, `refresh_token`, `client_id`, `portal_base_url` for token refresh; then `access_token`, `portal_base_url`, `inference_base_url` for agent key minting. This is the full state currently in `auth.json → providers.nous`. + +#### Codex + +```json +{ + "credential_pool": { + "openai-codex": [ + { + "id": "c1d2x3", + "label": "user@openai.com", + "auth_type": "oauth", + "priority": 0, + "source": "device_code", + "access_token": "eyJ...", + "refresh_token": "rt-...", + "base_url": "https://chatgpt.com/backend-api/codex", + "last_refresh": "2026-03-23T10:00:00Z", + "last_status": "ok", + "last_status_at": null, + "last_error_code": null + } + ] + } +} +``` + +Refresh needs: `access_token`, `refresh_token`. Returns new tokens dict. `base_url` is carried per-entry because it can vary. + +#### API-Key Providers (generic) + +For providers that only use API keys (OpenRouter, Z.AI, Kimi, MiniMax, DeepSeek, etc.), entries are simpler: + +```json +{ + "credential_pool": { + "openrouter": [ + { + "id": "or1234", + "label": "personal", + "auth_type": "api_key", + "priority": 0, + "source": "env:OPENROUTER_API_KEY", + "access_token": "sk-or-...", + "refresh_token": null, + "base_url": "https://openrouter.ai/api/v1", + "last_status": "ok", + "last_status_at": null, + "last_error_code": null + } + ] + } +} +``` + +No refresh logic — API keys are static. Rotation still works on 402/429. + +### Common Fields (all entry types) + +| Field | Type | Description | +|-------|------|-------------| +| `id` | str | Unique ID (hex, assigned at registration) | +| `label` | str | Display name (auto-extracted JWT email or user-provided) | +| `auth_type` | str | `"oauth"` or `"api_key"` | +| `priority` | int | Lower = tried first (fill-first). Set at registration time. | +| `source` | str | Provenance: `claude_code`, `hermes_pkce`, `device_code`, `env:VAR_NAME`, `manual` | +| `access_token` | str | The token used for API calls | +| `refresh_token` | str? | OAuth refresh token (null for API keys) | +| `last_status` | str? | `"ok"`, `"exhausted"`, or null | +| `last_status_at` | float? | Unix timestamp of last status change | +| `last_error_code` | int? | HTTP status code that caused exhaustion | + +--- + +## Single-Authority API Key Storage + +### The Problem Today + +API keys currently come from three sources with no single owner: +1. **Env vars** — `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`, etc. (resolved by `runtime_provider.py`) +2. **Config.yaml** — `model.api_key` for custom endpoints +3. **Future: `hermes auth add --type api-key`** — manual pool registration + +### The Solution: Pool Owns Everything + +The pool is the single source of truth for all credentials. Env vars and config.yaml become **seed sources** that populate the pool, not runtime resolution paths. + +**Seeding rules (on pool load):** + +1. For each provider in `PROVIDER_REGISTRY` with `api_key_env_vars`: + - Check each env var in priority order + - If set and no pool entry exists with `source: "env:VAR_NAME"` → create one at lowest priority + - If set and a pool entry with that source already exists → update `access_token` if changed (env var wins on conflict — user may have rotated the key) +2. For Anthropic specifically, also check: + - `~/.claude/.credentials.json` → seed as `source: "claude_code"` OAuth entry + - `~/.hermes/.anthropic_oauth.json` → seed as `source: "hermes_pkce"` OAuth entry +3. For Nous/Codex, also check: + - `auth.json → providers.nous` → seed as `source: "device_code"` OAuth entry + - `auth.json → providers.openai-codex` → seed as `source: "device_code"` OAuth entry + +**Key property:** seeding is additive and idempotent. Existing pool entries are never deleted by seeding. Manual entries (`source: "manual"`) are never touched. + +**After seeding, runtime_provider.py calls `pool.select()` instead of its own env-var chain.** The pool returns the first non-exhausted credential by priority. + +### What Happens to Env Vars + +Env vars still work — they seed the pool transparently. A user who sets `ANTHROPIC_API_KEY` and never runs `hermes auth add` gets exactly the same behavior as today: one credential, no rotation. The pool is invisible until they add a second credential. + +If a user later runs `hermes auth add anthropic --type api-key`, the new key gets priority after the env-var-seeded entry. They now have rotation. + +--- + +## Refresh Architecture + +### Pure Refresh Functions (New) + +Each OAuth provider gets a pure function that takes credential state in and returns updated state out, with **no file writes**: + +```python +# agent/anthropic_adapter.py +def refresh_anthropic_oauth_pure(refresh_token: str) -> Dict[str, Any]: + """Token exchange only. No file writes. + Returns: {"access_token": str, "refresh_token": str, "expires_at_ms": int} + """ + CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" + data = urllib.parse.urlencode({ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": CLIENT_ID, + }).encode() + req = urllib.request.Request( + "https://console.anthropic.com/v1/oauth/token", + data=data, + headers={ + "Content-Type": "application/x-www-form-urlencoded", + "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)", + }, + method="POST", + ) + with urllib.request.urlopen(req, timeout=10) as resp: + result = json.loads(resp.read().decode()) + return { + "access_token": result["access_token"], + "refresh_token": result.get("refresh_token", refresh_token), + "expires_at_ms": int(time.time() * 1000) + (result.get("expires_in", 3600) * 1000), + } +``` + +```python +# hermes_cli/auth.py +def refresh_nous_oauth_pure( + access_token: str, + refresh_token: str, + client_id: str, + portal_base_url: str, + inference_base_url: str, + *, + min_key_ttl_seconds: int = 1800, + timeout_seconds: float = 15.0, +) -> Dict[str, Any]: + """Refresh Nous access token + mint agent key. No auth.json writes. + Returns updated state dict with all Nous-specific fields. + """ + # Step 1: refresh access_token if expiring (same HTTP call as _refresh_access_token) + # Step 2: mint agent key (same HTTP call as _mint_agent_key) + # Returns: {"access_token", "refresh_token", "expires_at", "agent_key", + # "agent_key_expires_at", "inference_base_url", ...} + ... + +def refresh_codex_oauth_pure( + access_token: str, + refresh_token: str, + *, + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + """Refresh Codex OAuth tokens. No auth.json writes. + Returns: {"access_token": str, "refresh_token": str} + """ + # Same HTTP call as _refresh_codex_auth_tokens + ... +``` + +### Existing Functions Refactored (Backward Compat) + +The existing singleton functions call the new pure functions + write to their singleton files. No behavior change for code that doesn't use the pool. + +```python +# BEFORE: +def _refresh_oauth_token(creds): + # ... HTTP call ... + _write_claude_code_credentials(new_access, new_refresh, new_expires_ms) + return new_access + +# AFTER: +def _refresh_oauth_token(creds): + result = refresh_anthropic_oauth_pure(creds["refreshToken"]) + _write_claude_code_credentials( + result["access_token"], result["refresh_token"], result["expires_at_ms"] + ) + return result["access_token"] +``` + +### Pool Refresh Flow + +``` +pool.try_refresh(entry) → updated_entry | None: + 1. Dispatch to provider-specific pure refresh: + - anthropic → refresh_anthropic_oauth_pure(entry.refresh_token) + - nous → refresh_nous_oauth_pure(entry.access_token, entry.refresh_token, ...) + - codex → refresh_codex_oauth_pure(entry.access_token, entry.refresh_token) + 2. On success: + - Update entry fields in-memory (access_token, refresh_token, expires_at, etc.) + - Persist updated pool entry to auth.json (pool's section, not singleton files) + - Return updated entry + 3. On failure: + - Mark entry exhausted (last_status="exhausted", last_status_at=now) + - Persist status to auth.json + - Return None +``` + +**Key guarantee:** refreshing entry B never touches entry A. Each entry carries its own state, and the pure refresh functions have no side effects. + +--- + +## Startup Wiring + +### runtime_provider.py Changes + +`resolve_runtime_provider()` currently resolves credentials via provider-specific chains (env vars, auth.json singletons, file reads). After this change: + +```python +def resolve_runtime_provider(*, requested=None, explicit_api_key=None, explicit_base_url=None): + # ... existing provider resolution (which provider to use) stays the same ... + + provider = resolve_provider(requested, ...) + + # NEW: consult pool for initial credential + from agent.credential_pool import load_pool + pool = load_pool(provider) + + if pool and pool.has_credentials(): + entry = pool.select() + if entry: + return { + "provider": provider, + "api_mode": _api_mode_for_provider(provider, entry), + "base_url": _base_url_for_entry(provider, entry), + "api_key": entry.access_token, + "source": entry.source, + "credential_pool": pool, # pass pool to AIAgent for rotation + # ... provider-specific fields from entry ... + } + + # FALLBACK: no pool or pool empty — use existing resolution + # (this path handles first-time users who haven't run setup yet) + if provider == "nous": + creds = resolve_nous_runtime_credentials(...) + ... + elif provider == "anthropic": + ... +``` + +The pool is passed to `AIAgent` via the runtime dict so the agent can rotate credentials mid-conversation without re-resolving. + +### AIAgent.__init__ Changes + +```python +class AIAgent: + def __init__(self, ..., credential_pool=None): + self._credential_pool = credential_pool + # ... existing init ... +``` + +### Gateway Startup + +Gateway creates `AIAgent` instances per session. Since `resolve_runtime_provider()` now returns the pool, gateway gets rotation for free: + +```python +# gateway/run.py — existing code already calls resolve_runtime_provider() +runtime = resolve_runtime_provider(requested=config.get("provider")) +agent = AIAgent(..., credential_pool=runtime.get("credential_pool")) +``` + +No additional gateway changes needed. + +--- + +## Runtime Flow + +### Credential Selection (fill-first) + +``` +pool.select(): + 1. For each entry by priority (ascending): + a. If last_status == "exhausted" and now - last_status_at < 86400 → skip + b. If last_status == "exhausted" and now - last_status_at >= 86400 → reset to "ok" + c. If auth_type == "oauth" and token expires within 120s: + - try_refresh(entry) + - If refresh fails → mark exhausted, continue to next + d. Return this entry + 2. All skipped/exhausted → return None +``` + +### Error Handling in run_agent.py + +Replaces the three provider-specific `if/elif` blocks (~lines 6104-6147): + +```python +# In the except block, after status_code is extracted: + +# Credential pool rotation (replaces 3 provider-specific refresh blocks) +if self._credential_pool: + if status_code == 402: + prev = self._credential_pool.current() + next_entry = self._credential_pool.mark_exhausted_and_rotate( + status_code=402) + if next_entry: + self._swap_credential(next_entry) + print(f"{self.log_prefix}🔐 {prev.label} exhausted (402), " + f"switching to {next_entry.label}") + retry_count = 0 + continue + # All exhausted — fall through to cross-provider fallback below + + elif status_code == 429 and retry_429_with_same_cred: + # Second 429 on same credential for this request + prev = self._credential_pool.current() + next_entry = self._credential_pool.mark_exhausted_and_rotate( + status_code=429) + if next_entry: + self._swap_credential(next_entry) + print(f"{self.log_prefix}🔐 {prev.label} rate-limited (429), " + f"switching to {next_entry.label}") + retry_count = 0 + continue + + elif status_code == 429 and not retry_429_with_same_cred: + retry_429_with_same_cred = True + # Fall through to existing backoff logic (retry same credential) + + elif status_code == 401: + refreshed = self._credential_pool.try_refresh_current() + if refreshed: + self._swap_credential(refreshed) + print(f"{self.log_prefix}🔐 Credentials refreshed, retrying...") + continue + # Refresh failed — show existing diagnostic output +``` + +### _swap_credential (replaces 3 methods) + +```python +def _swap_credential(self, entry): + """Hot-swap the active credential. Dispatches by api_mode.""" + if self.api_mode == "anthropic_messages": + from agent.anthropic_adapter import build_anthropic_client, _is_oauth_token + try: + self._anthropic_client.close() + except Exception: + pass + self._anthropic_api_key = entry.access_token + self._anthropic_client = build_anthropic_client( + entry.access_token, self._anthropic_base_url) + self._is_anthropic_oauth = _is_oauth_token(entry.access_token) + + elif self.api_mode == "codex_responses": + self.api_key = entry.access_token + self.base_url = getattr(entry, "base_url", self.base_url) + self._client_kwargs["api_key"] = self.api_key + self._client_kwargs["base_url"] = self.base_url + self._replace_primary_openai_client(reason="credential_rotation") + + elif self.api_mode == "chat_completions": + self.api_key = entry.access_token + base = getattr(entry, "inference_base_url", None) or self.base_url + self.base_url = base + self._client_kwargs["api_key"] = self.api_key + self._client_kwargs["base_url"] = self.base_url + self._client_kwargs.pop("default_headers", None) + self._replace_primary_openai_client(reason="credential_rotation") +``` + +### Deleted Methods + +- `_try_refresh_codex_client_credentials()` (~30 lines) +- `_try_refresh_nous_client_credentials()` (~35 lines) +- `_try_refresh_anthropic_client_credentials()` (~40 lines) + +Total: ~105 lines removed, replaced by `_swap_credential()` (~30 lines) + pool delegation. + +--- + +## CLI Commands + +### `hermes auth add ` + +``` +$ hermes auth add anthropic + + How would you like to authenticate? + 1. Claude Pro/Max subscription (OAuth login) + 2. API key + + > 1 + Running OAuth flow... + [browser opens, user logs in with different account] + + ✓ Authenticated as work@company.com + ✓ Added as anthropic credential #2 (priority 1) +``` + +``` +$ hermes auth add anthropic --type api-key + Paste your API key: sk-ant-api-*** + Label (optional, default: api-key-1): work-budget + + ✓ Added as anthropic credential #3: "work-budget" (priority 2) +``` + +Implementation: reuses existing OAuth flows from `setup.py` (`run_oauth_setup_token()`, device code flow, etc.) — the pool just stores the result instead of the singleton file. + +### `hermes auth list` + +``` +$ hermes auth list + + anthropic (3 credentials): + #1 user@gmail.com oauth claude_code ← active + #2 work@company.com oauth hermes_pkce exhausted (402, 2h ago) + #3 work-budget api_key manual + + nous (1 credential): + #1 user@nous.com oauth device_code ← active + + openrouter (1 credential): + #1 personal api_key env:OPENROUTER_API_KEY ← active +``` + +### `hermes auth remove ` + +``` +$ hermes auth remove anthropic 2 + ✓ Removed anthropic credential #2 (work@company.com) + Remaining credentials re-prioritized. +``` + +### `hermes auth reset ` + +Clears `last_status` on all credentials for a provider (manual recovery): + +``` +$ hermes auth reset anthropic + ✓ Reset status on 3 anthropic credentials +``` + +--- + +## Backward Compatibility + +### Auto-Migration (First Pool Load) + +When `credential_pool` is absent in `auth.json`, `load_pool()` runs migration: + +1. **Anthropic:** Walk the existing `resolve_anthropic_token()` priority chain. For each source that has a credential, create a pool entry: + - `ANTHROPIC_TOKEN` env → entry with `source: "env:ANTHROPIC_TOKEN"` + - `~/.hermes/.anthropic_oauth.json` → entry with `source: "hermes_pkce"` + - `~/.claude/.credentials.json` → entry with `source: "claude_code"` + - `ANTHROPIC_API_KEY` env → entry with `source: "env:ANTHROPIC_API_KEY"` + - Priority follows the existing resolution order (first found = priority 0) + +2. **Nous:** Copy `auth.json → providers.nous` state into a pool entry with `source: "device_code"`. + +3. **Codex:** Copy `auth.json → providers.openai-codex` state into a pool entry with `source: "device_code"`. + +4. **API-key providers:** For each provider in `PROVIDER_REGISTRY` with `api_key_env_vars`, check env vars and create entries. + +**Migration is additive.** Original singleton state is preserved (existing code paths still work). The pool is written alongside, and `runtime_provider.py` prefers it when present. + +### Env Var Re-Seeding (Every Pool Load) + +On every `load_pool()`, env vars are re-checked: +- If env var value changed since last seed → update the pool entry's `access_token` +- If env var is newly set → create entry at lowest priority +- If env var is now empty but pool entry with that source exists → keep pool entry (user may have moved the key to pool-only) + +This ensures `export ANTHROPIC_API_KEY=new-key` takes effect without `hermes auth add`. + +--- + +## Auxiliary Clients + +`auxiliary_client.py` does **not** use the pool for rotation. It continues resolving credentials via its existing paths (`_read_nous_auth()`, `_read_codex_access_token()`, `_try_anthropic()`, etc.). + +**One addition:** before resolving, check if the pool has a `last_status: "exhausted"` (within 24h) for the entry that would be resolved. If so, skip to the next available credential in the pool: + +```python +# In _try_anthropic() or resolve_provider_client(): +from agent.credential_pool import load_pool +pool = load_pool("anthropic") +if pool: + entry = pool.select() # skips exhausted entries + if entry: + return build_anthropic_client(entry.access_token, ...), model +# Fall through to existing resolution +``` + +This is ~15 lines per provider in `auxiliary_client.py`. It prevents auxiliary tasks from wasting a round-trip on a known-dead credential without requiring full pool integration. + +--- + +## File Changes + +### New Files + +| File | Est. Lines | Purpose | +|------|-----------|---------| +| `agent/credential_pool.py` | ~350 | `CredentialPool` class, `load_pool()`, provider-specific entry parsing, fill-first selection, mark/rotate, persist, migration, env-var seeding, JWT label extraction | +| `hermes_cli/auth_commands.py` | ~150 | `auth add`, `auth list`, `auth remove`, `auth reset` CLI commands | + +### Modified Files + +| File | Change | Est. Delta | +|------|--------|-----------| +| `agent/anthropic_adapter.py` | Extract `refresh_anthropic_oauth_pure()`. Refactor `_refresh_oauth_token()` + `refresh_hermes_oauth_token()` to call it. | +40, ~15 refactored | +| `hermes_cli/auth.py` | Extract `refresh_nous_oauth_pure()`, `refresh_codex_oauth_pure()`. Add `read_credential_pool()` / `write_credential_pool()` with file-lock integration. | +100, ~25 refactored | +| `hermes_cli/runtime_provider.py` | `resolve_runtime_provider()` consults pool before falling back to existing chains. Passes pool in return dict. | +30 | +| `run_agent.py` | Accept `credential_pool` in init. Replace 3 `_try_refresh_*` methods + 3 error blocks with pool rotation + `_swap_credential()`. | -105, +60 | +| `hermes_cli/main.py` | Register `auth add/list/remove/reset` subcommands. | +10 | +| `agent/auxiliary_client.py` | Check pool `last_status` before resolving credentials in `_try_anthropic()`, `_read_nous_auth()`, `_read_codex_access_token()`. | +20 | + +### Not Touched + +- `gateway/run.py` — gets pool for free via `resolve_runtime_provider()` → `AIAgent` +- `config.yaml` — no new config keys +- `hermes_cli/setup.py` — existing OAuth flows reused by `hermes auth add` + +### Total + +~500 new lines, ~145 removed/refactored, 8 files touched. + +--- + +## Test Plan + +### Unit Tests — credential_pool.py + +| # | Test | Verifies | +|---|------|----------| +| 1 | Fill-first selection returns lowest-priority non-exhausted entry | Selection strategy | +| 2 | All entries exhausted → returns None | Exhaustion boundary | +| 3 | 24h TTL: exhausted entry with old timestamp resets to "ok" | TTL expiry | +| 4 | 24h TTL: exhausted entry within window stays exhausted | TTL enforcement | +| 5 | `mark_exhausted_and_rotate()` sets status + persists + returns next | Rotation + persistence | +| 6 | `try_refresh()` success: updates token fields in-memory + disk | Refresh happy path | +| 7 | `try_refresh()` failure: marks exhausted, returns None | Refresh failure | +| 8 | JWT label extraction: valid JWT → email | Auto-labeling | +| 9 | JWT label extraction: non-JWT → None | Graceful fallback | +| 10 | Env-var seeding: creates entry, deduplicates on reload | Seeding idempotency | +| 11 | Env-var seeding: updated env var updates pool entry token | Env var rotation | +| 12 | Migration: Anthropic sources → pool entries with correct priority | Backward compat | +| 13 | Migration: Nous state → pool entry with full provider fields | Provider-specific migration | +| 14 | Migration: Codex state → pool entry | Provider-specific migration | +| 15 | Migration: idempotent (running twice doesn't duplicate) | Safety | + +### Integration Tests — refresh isolation + +| # | Test | Verifies | +|---|------|----------| +| 16 | Refresh Anthropic cred B does NOT overwrite cred A's token | No singleton clobbering | +| 17 | Refresh Nous cred B does NOT overwrite cred A's agent key | No singleton clobbering | +| 18 | Pool persists refresh result to `credential_pool` section only | Isolation from singleton files | +| 19 | Existing singleton refresh functions still work (backward compat) | Refactor didn't break old path | + +### Integration Tests — runtime wiring + +| # | Test | Verifies | +|---|------|----------| +| 20 | `resolve_runtime_provider()` returns pool credential when pool exists | Startup wire-up | +| 21 | `resolve_runtime_provider()` falls back to old chain when pool empty | Backward compat | +| 22 | Pool passed through to AIAgent via runtime dict | Rotation availability | +| 23 | Gateway creates agent with pool from `resolve_runtime_provider()` | Gateway gets rotation | + +### Integration Tests — rotation flow + +| # | Test | Verifies | +|---|------|----------| +| 24 | 402 on cred 1 → auto-rotate to cred 2 → success | Happy path rotation | +| 25 | 429 (first) → retry same cred → success | Transient throttle | +| 26 | 429 (first) → retry same cred → 429 again → rotate | Hard rate limit | +| 27 | All creds exhausted → `_try_activate_fallback()` | Cross-provider fallback | +| 28 | 401 → `try_refresh_current()` → swap → success | Auth refresh | +| 29 | Cross-session: exhaust in session 1, session 2 skips it | Persisted status | +| 30 | Cross-session: 24h later, session re-probes | TTL expiry | +| 31 | Auxiliary client skips known-dead credential | Auxiliary awareness | + +### CLI Tests + +| # | Test | Verifies | +|---|------|----------| +| 32 | `hermes auth add anthropic` (OAuth mock) → pool entry with JWT label | Registration | +| 33 | `hermes auth add anthropic --type api-key` → pool entry with manual label | API key registration | +| 34 | `hermes auth list` output format matches spec | Display | +| 35 | `hermes auth remove` removes correct entry, re-indexes priorities | Removal | +| 36 | `hermes auth reset` clears all `last_status` for provider | Manual recovery | diff --git a/gateway/run.py b/gateway/run.py index 7876565b402..8043d7f7007 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -250,6 +250,7 @@ def _resolve_runtime_agent_kwargs() -> dict: "api_mode": runtime.get("api_mode"), "command": runtime.get("command"), "args": list(runtime.get("args") or []), + "credential_pool": runtime.get("credential_pool"), } diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index b2211287d1b..85f48f892f7 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -537,7 +537,11 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]: except Exception: return {"version": AUTH_STORE_VERSION, "providers": {}} - if isinstance(raw, dict) and isinstance(raw.get("providers"), dict): + if isinstance(raw, dict) and ( + isinstance(raw.get("providers"), dict) + or isinstance(raw.get("credential_pool"), dict) + ): + raw.setdefault("providers", {}) return raw # Migrate from PR's "systems" format if present @@ -605,6 +609,30 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di auth_store["active_provider"] = provider_id +def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]: + """Return the persisted credential pool, or one provider slice.""" + auth_store = _load_auth_store() + pool = auth_store.get("credential_pool") + if not isinstance(pool, dict): + pool = {} + if provider_id is None: + return dict(pool) + provider_entries = pool.get(provider_id) + return list(provider_entries) if isinstance(provider_entries, list) else [] + + +def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path: + """Persist one provider's credential pool under auth.json.""" + with _auth_store_lock(): + auth_store = _load_auth_store() + pool = auth_store.get("credential_pool") + if not isinstance(pool, dict): + pool = {} + auth_store["credential_pool"] = pool + pool[provider_id] = list(entries) + return _save_auth_store(auth_store) + + def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]: """Return persisted auth state for a provider, or None.""" auth_store = _load_auth_store() @@ -878,15 +906,14 @@ def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None _save_auth_store(auth_store) -def _refresh_codex_auth_tokens( - tokens: Dict[str, str], - timeout_seconds: float, -) -> Dict[str, str]: - """Refresh Codex access token using the refresh token. - - Saves the new tokens to Hermes auth store automatically. - """ - refresh_token = tokens.get("refresh_token") +def refresh_codex_oauth_pure( + access_token: str, + refresh_token: str, + *, + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + """Refresh Codex OAuth tokens without mutating Hermes auth state.""" + del access_token # Access token is only used by callers to decide whether to refresh. if not isinstance(refresh_token, str) or not refresh_token.strip(): raise AuthError( "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.", @@ -941,8 +968,8 @@ def _refresh_codex_auth_tokens( relogin_required=True, ) from exc - access_token = refresh_payload.get("access_token") - if not isinstance(access_token, str) or not access_token.strip(): + refreshed_access = refresh_payload.get("access_token") + if not isinstance(refreshed_access, str) or not refreshed_access.strip(): raise AuthError( "Codex token refresh response was missing access_token.", provider="openai-codex", @@ -950,11 +977,33 @@ def _refresh_codex_auth_tokens( relogin_required=True, ) - updated_tokens = dict(tokens) - updated_tokens["access_token"] = access_token.strip() + updated = { + "access_token": refreshed_access.strip(), + "refresh_token": refresh_token.strip(), + "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + } next_refresh = refresh_payload.get("refresh_token") if isinstance(next_refresh, str) and next_refresh.strip(): - updated_tokens["refresh_token"] = next_refresh.strip() + updated["refresh_token"] = next_refresh.strip() + return updated + + +def _refresh_codex_auth_tokens( + tokens: Dict[str, str], + timeout_seconds: float, +) -> Dict[str, str]: + """Refresh Codex access token using the refresh token. + + Saves the new tokens to Hermes auth store automatically. + """ + refreshed = refresh_codex_oauth_pure( + str(tokens.get("access_token", "") or ""), + str(tokens.get("refresh_token", "") or ""), + timeout_seconds=timeout_seconds, + ) + updated_tokens = dict(tokens) + updated_tokens["access_token"] = refreshed["access_token"] + updated_tokens["refresh_token"] = refreshed["refresh_token"] _save_codex_tokens(updated_tokens) return updated_tokens @@ -1293,6 +1342,91 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool: return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds) +def refresh_nous_oauth_pure( + access_token: str, + refresh_token: str, + client_id: str, + portal_base_url: str, + inference_base_url: str, + *, + token_type: str = "Bearer", + scope: str = DEFAULT_NOUS_SCOPE, + obtained_at: Optional[str] = None, + expires_at: Optional[str] = None, + agent_key: Optional[str] = None, + agent_key_expires_at: Optional[str] = None, + min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + timeout_seconds: float = 15.0, + insecure: Optional[bool] = None, + ca_bundle: Optional[str] = None, + force_refresh: bool = False, + force_mint: bool = False, +) -> Dict[str, Any]: + """Refresh Nous OAuth state without mutating auth.json.""" + state: Dict[str, Any] = { + "access_token": access_token, + "refresh_token": refresh_token, + "client_id": client_id or DEFAULT_NOUS_CLIENT_ID, + "portal_base_url": (portal_base_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/"), + "inference_base_url": (inference_base_url or DEFAULT_NOUS_INFERENCE_URL).rstrip("/"), + "token_type": token_type or "Bearer", + "scope": scope or DEFAULT_NOUS_SCOPE, + "obtained_at": obtained_at, + "expires_at": expires_at, + "agent_key": agent_key, + "agent_key_expires_at": agent_key_expires_at, + "tls": { + "insecure": bool(insecure), + "ca_bundle": ca_bundle, + }, + } + verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state) + timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0) + + with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client: + if force_refresh or _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS): + refreshed = _refresh_access_token( + client=client, + portal_base_url=state["portal_base_url"], + client_id=state["client_id"], + refresh_token=state["refresh_token"], + ) + now = datetime.now(timezone.utc) + access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in")) + state["access_token"] = refreshed["access_token"] + state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"] + state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer" + state["scope"] = refreshed.get("scope") or state.get("scope") + refreshed_url = _optional_base_url(refreshed.get("inference_base_url")) + if refreshed_url: + state["inference_base_url"] = refreshed_url + state["obtained_at"] = now.isoformat() + state["expires_in"] = access_ttl + state["expires_at"] = datetime.fromtimestamp( + now.timestamp() + access_ttl, tz=timezone.utc + ).isoformat() + + if force_mint or not _agent_key_is_usable(state, max(60, int(min_key_ttl_seconds))): + mint_payload = _mint_agent_key( + client=client, + portal_base_url=state["portal_base_url"], + access_token=state["access_token"], + min_ttl_seconds=min_key_ttl_seconds, + ) + now = datetime.now(timezone.utc) + state["agent_key"] = mint_payload.get("api_key") + state["agent_key_id"] = mint_payload.get("key_id") + state["agent_key_expires_at"] = mint_payload.get("expires_at") + state["agent_key_expires_in"] = mint_payload.get("expires_in") + state["agent_key_reused"] = bool(mint_payload.get("reused", False)) + state["agent_key_obtained_at"] = now.isoformat() + minted_url = _optional_base_url(mint_payload.get("inference_base_url")) + if minted_url: + state["inference_base_url"] = minted_url + + return state + + def resolve_nous_runtime_credentials( *, min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, @@ -2159,34 +2293,36 @@ def _codex_device_code_login() -> Dict[str, Any]: } -def _login_nous(args, pconfig: ProviderConfig) -> None: - """Nous Portal device authorization flow.""" +def _nous_device_code_login( + *, + portal_base_url: Optional[str] = None, + inference_base_url: Optional[str] = None, + client_id: Optional[str] = None, + scope: Optional[str] = None, + open_browser: bool = True, + timeout_seconds: float = 15.0, + insecure: bool = False, + ca_bundle: Optional[str] = None, + min_key_ttl_seconds: int = 5 * 60, +) -> Dict[str, Any]: + """Run the Nous device-code flow and return full OAuth state without persisting.""" + pconfig = PROVIDER_REGISTRY["nous"] portal_base_url = ( - getattr(args, "portal_url", None) + portal_base_url or os.getenv("HERMES_PORTAL_BASE_URL") or os.getenv("NOUS_PORTAL_BASE_URL") or pconfig.portal_base_url ).rstrip("/") requested_inference_url = ( - getattr(args, "inference_url", None) + inference_base_url or os.getenv("NOUS_INFERENCE_BASE_URL") or pconfig.inference_base_url ).rstrip("/") - client_id = getattr(args, "client_id", None) or pconfig.client_id - scope = getattr(args, "scope", None) or pconfig.scope - open_browser = not getattr(args, "no_browser", False) - timeout_seconds = getattr(args, "timeout", None) or 15.0 + client_id = client_id or pconfig.client_id + scope = scope or pconfig.scope timeout = httpx.Timeout(timeout_seconds) - - insecure = bool(getattr(args, "insecure", False)) - ca_bundle = ( - getattr(args, "ca_bundle", None) - or os.getenv("HERMES_CA_BUNDLE") - or os.getenv("SSL_CERT_FILE") - ) verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True) - # Skip browser open in SSH sessions if _is_remote_session(): open_browser = False @@ -2197,74 +2333,121 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: elif ca_bundle: print(f"TLS verification: custom CA bundle ({ca_bundle})") - try: - with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client: - device_data = _request_device_code( - client=client, portal_base_url=portal_base_url, - client_id=client_id, scope=scope, - ) - - verification_url = str(device_data["verification_uri_complete"]) - user_code = str(device_data["user_code"]) - expires_in = int(device_data["expires_in"]) - interval = int(device_data["interval"]) - - print() - print("To continue:") - print(f" 1. Open: {verification_url}") - print(f" 2. If prompted, enter code: {user_code}") - - if open_browser: - opened = webbrowser.open(verification_url) - if opened: - print(" (Opened browser for verification)") - else: - print(" Could not open browser automatically — use the URL above.") - - effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS)) - print(f"Waiting for approval (polling every {effective_interval}s)...") - - token_data = _poll_for_token( - client=client, portal_base_url=portal_base_url, - client_id=client_id, device_code=str(device_data["device_code"]), - expires_in=expires_in, poll_interval=interval, - ) - - # Process token response - now = datetime.now(timezone.utc) - token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0)) - expires_at = now.timestamp() + token_expires_in - inference_base_url = ( - _optional_base_url(token_data.get("inference_base_url")) - or requested_inference_url + with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client: + device_data = _request_device_code( + client=client, + portal_base_url=portal_base_url, + client_id=client_id, + scope=scope, ) - if inference_base_url != requested_inference_url: - print(f"Using portal-provided inference URL: {inference_base_url}") - auth_state = { - "portal_base_url": portal_base_url, - "inference_base_url": inference_base_url, - "client_id": client_id, - "scope": token_data.get("scope") or scope, - "token_type": token_data.get("token_type", "Bearer"), - "access_token": token_data["access_token"], - "refresh_token": token_data.get("refresh_token"), - "obtained_at": now.isoformat(), - "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(), - "expires_in": token_expires_in, - "tls": { - "insecure": verify is False, - "ca_bundle": verify if isinstance(verify, str) else None, - }, - "agent_key": None, - "agent_key_id": None, - "agent_key_expires_at": None, - "agent_key_expires_in": None, - "agent_key_reused": None, - "agent_key_obtained_at": None, - } + verification_url = str(device_data["verification_uri_complete"]) + user_code = str(device_data["user_code"]) + expires_in = int(device_data["expires_in"]) + interval = int(device_data["interval"]) + + print() + print("To continue:") + print(f" 1. Open: {verification_url}") + print(f" 2. If prompted, enter code: {user_code}") + + if open_browser: + opened = webbrowser.open(verification_url) + if opened: + print(" (Opened browser for verification)") + else: + print(" Could not open browser automatically — use the URL above.") + + effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS)) + print(f"Waiting for approval (polling every {effective_interval}s)...") + + token_data = _poll_for_token( + client=client, + portal_base_url=portal_base_url, + client_id=client_id, + device_code=str(device_data["device_code"]), + expires_in=expires_in, + poll_interval=interval, + ) + + now = datetime.now(timezone.utc) + token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0)) + expires_at = now.timestamp() + token_expires_in + resolved_inference_url = ( + _optional_base_url(token_data.get("inference_base_url")) + or requested_inference_url + ) + if resolved_inference_url != requested_inference_url: + print(f"Using portal-provided inference URL: {resolved_inference_url}") + + auth_state = { + "portal_base_url": portal_base_url, + "inference_base_url": resolved_inference_url, + "client_id": client_id, + "scope": token_data.get("scope") or scope, + "token_type": token_data.get("token_type", "Bearer"), + "access_token": token_data["access_token"], + "refresh_token": token_data.get("refresh_token"), + "obtained_at": now.isoformat(), + "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(), + "expires_in": token_expires_in, + "tls": { + "insecure": verify is False, + "ca_bundle": verify if isinstance(verify, str) else None, + }, + "agent_key": None, + "agent_key_id": None, + "agent_key_expires_at": None, + "agent_key_expires_in": None, + "agent_key_reused": None, + "agent_key_obtained_at": None, + } + return refresh_nous_oauth_pure( + auth_state["access_token"], + auth_state["refresh_token"], + auth_state["client_id"], + auth_state["portal_base_url"], + auth_state["inference_base_url"], + token_type=auth_state["token_type"], + scope=auth_state["scope"], + obtained_at=auth_state["obtained_at"], + expires_at=auth_state["expires_at"], + agent_key=auth_state["agent_key"], + agent_key_expires_at=auth_state["agent_key_expires_at"], + min_key_ttl_seconds=min_key_ttl_seconds, + timeout_seconds=timeout_seconds, + insecure=insecure, + ca_bundle=ca_bundle, + force_refresh=False, + force_mint=True, + ) + + +def _login_nous(args, pconfig: ProviderConfig) -> None: + """Nous Portal device authorization flow.""" + timeout_seconds = getattr(args, "timeout", None) or 15.0 + insecure = bool(getattr(args, "insecure", False)) + ca_bundle = ( + getattr(args, "ca_bundle", None) + or os.getenv("HERMES_CA_BUNDLE") + or os.getenv("SSL_CERT_FILE") + ) + + try: + auth_state = _nous_device_code_login( + portal_base_url=getattr(args, "portal_url", None) or pconfig.portal_base_url, + inference_base_url=getattr(args, "inference_url", None) or pconfig.inference_base_url, + client_id=getattr(args, "client_id", None) or pconfig.client_id, + scope=getattr(args, "scope", None) or pconfig.scope, + open_browser=not getattr(args, "no_browser", False), + timeout_seconds=timeout_seconds, + insecure=insecure, + ca_bundle=ca_bundle, + min_key_ttl_seconds=5 * 60, + ) + inference_base_url = auth_state["inference_base_url"] + verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True) - # Save auth state with _auth_store_lock(): auth_store = _load_auth_store() _save_provider_state(auth_store, "nous", auth_state) @@ -2276,21 +2459,17 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: print(f" Auth state: {saved_to}") print(f" Config updated: {config_path} (model.provider=nous)") - # Mint an initial agent key and list available models try: - runtime_creds = resolve_nous_runtime_credentials( - min_key_ttl_seconds=5 * 60, - timeout_seconds=timeout_seconds, - insecure=insecure, ca_bundle=ca_bundle, - ) - runtime_key = runtime_creds.get("api_key") - runtime_base_url = runtime_creds.get("base_url") or inference_base_url + runtime_key = auth_state.get("agent_key") or auth_state.get("access_token") if not isinstance(runtime_key, str) or not runtime_key: - raise AuthError("No runtime API key available to fetch models", - provider="nous", code="invalid_token") + raise AuthError( + "No runtime API key available to fetch models", + provider="nous", + code="invalid_token", + ) model_ids = fetch_nous_models( - inference_base_url=runtime_base_url, + inference_base_url=inference_base_url, api_key=runtime_key, timeout_seconds=timeout_seconds, verify=verify, diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py new file mode 100644 index 00000000000..ba273305b75 --- /dev/null +++ b/hermes_cli/auth_commands.py @@ -0,0 +1,238 @@ +"""Credential-pool auth subcommands.""" + +from __future__ import annotations + +from getpass import getpass +import uuid + +from agent.credential_pool import PooledCredential, load_pool +import hermes_cli.auth as auth_mod +from hermes_cli.auth import PROVIDER_REGISTRY + + +def _normalize_provider(provider: str) -> str: + normalized = (provider or "").strip().lower() + if normalized in {"or", "open-router"}: + return "openrouter" + return normalized + + +def _provider_base_url(provider: str) -> str: + if provider == "openrouter": + return "https://openrouter.ai/api/v1" + pconfig = PROVIDER_REGISTRY.get(provider) + return pconfig.inference_base_url if pconfig else "" + + +def _derive_label(token: str, fallback: str) -> str: + claims = auth_mod._decode_jwt_claims(token) + for key in ("email", "preferred_username", "upn"): + value = claims.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return fallback + + +def _oauth_default_label(provider: str, count: int) -> str: + return f"{provider}-oauth-{count}" + + +def _api_key_default_label(count: int) -> str: + return f"api-key-{count}" + + +def _display_source(source: str) -> str: + return source.split(":", 1)[1] if source.startswith("manual:") else source + + +def auth_add_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "")) + if provider not in PROVIDER_REGISTRY and provider != "openrouter": + raise SystemExit(f"Unknown provider: {provider}") + + requested_type = str(getattr(args, "auth_type", "") or "").strip().lower() + if requested_type in {"api_key", "api-key"}: + requested_type = "api_key" + if not requested_type: + requested_type = "oauth" if provider in {"anthropic", "nous", "openai-codex"} else "api_key" + + pool = load_pool(provider) + + if requested_type == "api_key": + token = (getattr(args, "api_key", None) or "").strip() + if not token: + token = getpass("Paste your API key: ").strip() + if not token: + raise SystemExit("No API key provided.") + default_label = _api_key_default_label(len(pool.entries()) + 1) + label = (getattr(args, "label", None) or "").strip() + if not label: + label = input(f"Label (optional, default: {default_label}): ").strip() or default_label + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type="api_key", + priority=0, + source="manual", + access_token=token, + base_url=_provider_base_url(provider), + ) + pool.add_entry(entry) + print(f'Added {provider} credential #{len(pool.entries())}: "{label}"') + return + + if provider == "anthropic": + from agent import anthropic_adapter as anthropic_mod + + creds = anthropic_mod.run_hermes_oauth_login_pure() + if not creds: + raise SystemExit("Anthropic OAuth login did not return credentials.") + label = (getattr(args, "label", None) or "").strip() or _derive_label( + creds["access_token"], + _oauth_default_label(provider, len(pool.entries()) + 1), + ) + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type="oauth", + priority=0, + source="manual:hermes_pkce", + access_token=creds["access_token"], + refresh_token=creds.get("refresh_token"), + expires_at_ms=creds.get("expires_at_ms"), + base_url=_provider_base_url(provider), + ) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + + if provider == "nous": + creds = auth_mod._nous_device_code_login( + portal_base_url=getattr(args, "portal_url", None), + inference_base_url=getattr(args, "inference_url", None), + client_id=getattr(args, "client_id", None), + scope=getattr(args, "scope", None), + open_browser=not getattr(args, "no_browser", False), + timeout_seconds=getattr(args, "timeout", None) or 15.0, + insecure=bool(getattr(args, "insecure", False)), + ca_bundle=getattr(args, "ca_bundle", None), + min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))), + ) + label = (getattr(args, "label", None) or "").strip() or _derive_label( + creds.get("access_token", ""), + _oauth_default_label(provider, len(pool.entries()) + 1), + ) + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type="oauth", + priority=0, + source="manual:device_code", + access_token=creds["access_token"], + refresh_token=creds.get("refresh_token"), + expires_at=creds.get("expires_at"), + token_type=creds.get("token_type"), + scope=creds.get("scope"), + client_id=creds.get("client_id"), + portal_base_url=creds.get("portal_base_url"), + inference_base_url=creds.get("inference_base_url"), + obtained_at=creds.get("obtained_at"), + expires_in=creds.get("expires_in"), + agent_key=creds.get("agent_key"), + agent_key_id=creds.get("agent_key_id"), + agent_key_expires_at=creds.get("agent_key_expires_at"), + agent_key_expires_in=creds.get("agent_key_expires_in"), + agent_key_reused=creds.get("agent_key_reused"), + agent_key_obtained_at=creds.get("agent_key_obtained_at"), + tls=creds.get("tls"), + base_url=creds.get("inference_base_url"), + ) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + + if provider == "openai-codex": + creds = auth_mod._codex_device_code_login() + label = (getattr(args, "label", None) or "").strip() or _derive_label( + creds["tokens"]["access_token"], + _oauth_default_label(provider, len(pool.entries()) + 1), + ) + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type="oauth", + priority=0, + source="manual:device_code", + access_token=creds["tokens"]["access_token"], + refresh_token=creds["tokens"].get("refresh_token"), + base_url=creds.get("base_url"), + last_refresh=creds.get("last_refresh"), + ) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + + raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.") + + +def auth_list_command(args) -> None: + provider_filter = _normalize_provider(getattr(args, "provider", "") or "") + providers = [provider_filter] if provider_filter else sorted({ + *PROVIDER_REGISTRY.keys(), + "openrouter", + }) + for provider in providers: + pool = load_pool(provider) + entries = pool.entries() + if not entries: + continue + current = pool.select() + print(f"{provider} ({len(entries)} credentials):") + for idx, entry in enumerate(entries, start=1): + marker = " " + if current is not None and entry.id == current.id: + marker = "← " + status = "" + if entry.last_status == "exhausted": + status = f" exhausted ({entry.last_error_code})" + source = _display_source(entry.source) + print(f" #{idx} {entry.label:<20} {entry.auth_type:<7} {source}{status} {marker}".rstrip()) + print() + + +def auth_remove_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "")) + index = int(getattr(args, "index")) + pool = load_pool(provider) + removed = pool.remove_index(index) + if removed is None: + raise SystemExit(f"No credential #{index} for provider {provider}.") + print(f"Removed {provider} credential #{index} ({removed.label})") + + +def auth_reset_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "")) + pool = load_pool(provider) + count = pool.reset_statuses() + print(f"Reset status on {count} {provider} credentials") + + +def auth_command(args) -> None: + action = getattr(args, "auth_action", "") + if action == "add": + auth_add_command(args) + return + if action == "list": + auth_list_command(args) + return + if action == "remove": + auth_remove_command(args) + return + if action == "reset": + auth_reset_command(args) + return + raise SystemExit("Usage: hermes auth [add|list|remove|reset] ...") diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 9a2989484f2..0f7caa05654 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2310,6 +2310,12 @@ def cmd_logout(args): logout_command(args) +def cmd_auth(args): + """Manage pooled credentials.""" + from hermes_cli.auth_commands import auth_command + auth_command(args) + + def cmd_status(args): """Show status of all components.""" from hermes_cli.status import show_status @@ -2980,7 +2986,7 @@ def _coalesce_session_name_args(argv: list) -> list: or a known top-level subcommand. """ _SUBCOMMANDS = { - "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", + "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", "auth", "status", "cron", "doctor", "config", "pairing", "skills", "tools", "mcp", "sessions", "insights", "version", "update", "uninstall", } @@ -3021,6 +3027,10 @@ Examples: hermes --resume Resume a specific session by ID hermes setup Run setup wizard hermes logout Clear stored authentication + hermes auth add Add a pooled credential + hermes auth list List pooled credentials + hermes auth remove

Remove pooled credential by index + hermes auth reset Clear exhaustion status for a provider hermes model Select default model hermes config View configuration hermes config edit Edit config in $EDITOR @@ -3334,6 +3344,33 @@ For more help on a command: ) logout_parser.set_defaults(func=cmd_logout) + auth_parser = subparsers.add_parser( + "auth", + help="Manage pooled provider credentials", + ) + auth_subparsers = auth_parser.add_subparsers(dest="auth_action") + auth_add = auth_subparsers.add_parser("add", help="Add a pooled credential") + auth_add.add_argument("provider", help="Provider id (for example: anthropic, openai-codex, openrouter)") + auth_add.add_argument("--type", dest="auth_type", choices=["oauth", "api-key", "api_key"], help="Credential type to add") + auth_add.add_argument("--label", help="Optional display label") + auth_add.add_argument("--api-key", help="API key value (otherwise prompted securely)") + auth_add.add_argument("--portal-url", help="Nous portal base URL") + auth_add.add_argument("--inference-url", help="Nous inference base URL") + auth_add.add_argument("--client-id", help="OAuth client id") + auth_add.add_argument("--scope", help="OAuth scope override") + auth_add.add_argument("--no-browser", action="store_true", help="Do not auto-open a browser for OAuth login") + auth_add.add_argument("--timeout", type=float, help="OAuth/network timeout in seconds") + auth_add.add_argument("--insecure", action="store_true", help="Disable TLS verification for OAuth login") + auth_add.add_argument("--ca-bundle", help="Custom CA bundle for OAuth login") + auth_list = auth_subparsers.add_parser("list", help="List pooled credentials") + auth_list.add_argument("provider", nargs="?", help="Optional provider filter") + auth_remove = auth_subparsers.add_parser("remove", help="Remove a pooled credential by index") + auth_remove.add_argument("provider", help="Provider id") + auth_remove.add_argument("index", type=int, help="1-based credential index") + auth_reset = auth_subparsers.add_parser("reset", help="Clear exhaustion status for all credentials for a provider") + auth_reset.add_argument("provider", help="Provider id") + auth_parser.set_defaults(func=cmd_auth) + # ========================================================================= # status command # ========================================================================= diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 0c20c6768d3..3e3f2dd007c 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -6,8 +6,10 @@ import os from typing import Any, Dict, Optional from hermes_cli import auth as auth_mod +from agent.credential_pool import load_pool from hermes_cli.auth import ( AuthError, + DEFAULT_CODEX_BASE_URL, PROVIDER_REGISTRY, format_auth_error, resolve_provider, @@ -106,6 +108,48 @@ def _parse_api_mode(raw: Any) -> Optional[str]: return None +def _resolve_runtime_from_pool_entry( + *, + provider: str, + entry: Any, + requested_provider: str, + model_cfg: Optional[Dict[str, Any]] = None, + pool: Any = None, +) -> Dict[str, Any]: + model_cfg = model_cfg or _get_model_config() + base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/") + api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") + api_mode = "chat_completions" + if provider == "openai-codex": + api_mode = "codex_responses" + base_url = base_url or DEFAULT_CODEX_BASE_URL + elif provider == "anthropic": + api_mode = "anthropic_messages" + base_url = base_url or "https://api.anthropic.com" + elif provider == "nous": + api_mode = "chat_completions" + elif provider == "copilot": + api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", "")) + else: + configured_mode = _parse_api_mode(model_cfg.get("api_mode")) + if configured_mode: + api_mode = configured_mode + elif base_url.rstrip("/").endswith("/anthropic") or provider in ("minimax", "minimax-cn"): + api_mode = "anthropic_messages" + if base_url.rstrip("/").endswith("/v1"): + base_url = base_url.rstrip("/")[:-3] + "/anthropic" + + return { + "provider": provider, + "api_mode": api_mode, + "base_url": base_url, + "api_key": api_key, + "source": getattr(entry, "source", "pool"), + "credential_pool": pool, + "requested_provider": requested_provider, + } + + def resolve_requested_provider(requested: Optional[str] = None) -> str: """Resolve provider request from explicit arg, config, then env.""" if requested and requested.strip(): @@ -313,6 +357,38 @@ def resolve_runtime_provider( explicit_api_key=explicit_api_key, explicit_base_url=explicit_base_url, ) + model_cfg = _get_model_config() + + should_use_pool = provider != "openrouter" + if provider == "openrouter": + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + cfg_base_url = str(model_cfg.get("base_url") or "").strip() + env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip() + has_custom_endpoint = bool(explicit_base_url or env_openai_base_url) + if cfg_base_url and cfg_provider in {"auto", "custom"}: + has_custom_endpoint = True + should_use_pool = requested_provider == "openrouter" and not has_custom_endpoint + + try: + pool = load_pool(provider) if should_use_pool else None + except Exception: + pool = None + if pool and pool.has_credentials(): + entry = pool.select() + pool_api_key = "" + if entry is not None: + pool_api_key = ( + getattr(entry, "runtime_api_key", None) + or getattr(entry, "access_token", "") + ) + if entry is not None and pool_api_key: + return _resolve_runtime_from_pool_entry( + provider=provider, + entry=entry, + requested_provider=requested_provider, + model_cfg=model_cfg, + pool=pool, + ) if provider == "nous": creds = resolve_nous_runtime_credentials( @@ -385,7 +461,6 @@ def resolve_runtime_provider( pconfig = PROVIDER_REGISTRY.get(provider) if pconfig and pconfig.auth_type == "api_key": creds = resolve_api_key_provider_credentials(provider) - model_cfg = _get_model_config() base_url = creds.get("base_url", "").rstrip("/") api_mode = "chat_completions" if provider == "copilot": diff --git a/run_agent.py b/run_agent.py index 7c8d9208b13..2e33f2defcb 100644 --- a/run_agent.py +++ b/run_agent.py @@ -418,6 +418,7 @@ class AIAgent: honcho_config=None, iteration_budget: "IterationBudget" = None, fallback_model: Dict[str, Any] = None, + credential_pool=None, checkpoints_enabled: bool = False, checkpoint_max_snapshots: int = 50, pass_session_id: bool = False, @@ -485,6 +486,7 @@ class AIAgent: self._print_fn = None self.skip_context_files = skip_context_files self.pass_session_id = pass_session_id + self._credential_pool = credential_pool self.log_prefix_chars = log_prefix_chars self.log_prefix = f"{log_prefix} " if log_prefix else "" # Store effective base URL for feature detection (prompt caching, reasoning, etc.) @@ -3420,6 +3422,84 @@ class AIAgent: self._is_anthropic_oauth = _is_oauth_token(new_token) return True + def _apply_client_headers_for_base_url(self, base_url: str) -> None: + normalized = (base_url or "").lower() + if "openrouter" in normalized: + self._client_kwargs["default_headers"] = { + "HTTP-Referer": "https://hermes-agent.nousresearch.com", + "X-OpenRouter-Title": "Hermes Agent", + "X-OpenRouter-Categories": "productivity,cli-agent", + } + elif "api.githubcopilot.com" in normalized: + from hermes_cli.models import copilot_default_headers + + self._client_kwargs["default_headers"] = copilot_default_headers() + elif "api.kimi.com" in normalized: + self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"} + else: + self._client_kwargs.pop("default_headers", None) + + def _swap_credential(self, entry) -> None: + runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") + runtime_base = getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or self.base_url + + if self.api_mode == "anthropic_messages": + from agent.anthropic_adapter import build_anthropic_client, _is_oauth_token + + try: + self._anthropic_client.close() + except Exception: + pass + + self._anthropic_api_key = runtime_key + self._anthropic_base_url = runtime_base + self._anthropic_client = build_anthropic_client(runtime_key, runtime_base) + self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False + self.api_key = runtime_key + self.base_url = runtime_base + return + + self.api_key = runtime_key + self.base_url = runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base + self._client_kwargs["api_key"] = self.api_key + self._client_kwargs["base_url"] = self.base_url + self._apply_client_headers_for_base_url(self.base_url) + self._replace_primary_openai_client(reason="credential_rotation") + + def _recover_with_credential_pool( + self, + *, + status_code: Optional[int], + retry_429_with_same_cred: bool, + ) -> tuple[bool, bool]: + pool = getattr(self, "_credential_pool", None) + if pool is None or status_code is None: + return False, retry_429_with_same_cred + + if status_code == 402: + next_entry = pool.mark_exhausted_and_rotate(status_code=402) + if next_entry is not None: + self._swap_credential(next_entry) + return True, False + return False, retry_429_with_same_cred + + if status_code == 429: + if not retry_429_with_same_cred: + return False, True + next_entry = pool.mark_exhausted_and_rotate(status_code=429) + if next_entry is not None: + self._swap_credential(next_entry) + return True, False + return False, True + + if status_code == 401: + refreshed = pool.try_refresh_current() + if refreshed is not None: + self._swap_credential(refreshed) + return True, retry_429_with_same_cred + + return False, retry_429_with_same_cred + def _anthropic_messages_create(self, api_kwargs: dict): if self.api_mode == "anthropic_messages": self._try_refresh_anthropic_client_credentials() @@ -5724,6 +5804,7 @@ class AIAgent: codex_auth_retry_attempted = False anthropic_auth_retry_attempted = False nous_auth_retry_attempted = False + retry_429_with_same_cred = False restart_with_compressed_messages = False restart_with_length_continuation = False @@ -6101,6 +6182,12 @@ class AIAgent: self.thinking_callback("") status_code = getattr(api_error, "status_code", None) + recovered_with_pool, retry_429_with_same_cred = self._recover_with_credential_pool( + status_code=status_code, + retry_429_with_same_cred=retry_429_with_same_cred, + ) + if recovered_with_pool: + continue if ( self.api_mode == "codex_responses" and self.provider == "openai-codex" diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index e4c770f8ea6..28958643805 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -206,6 +206,31 @@ class TestAnthropicOAuthFlag: adapter = client.chat.completions assert adapter._is_oauth is False + def test_pool_entry_takes_priority_over_legacy_resolution(self): + class _Entry: + access_token = "sk-ant-oat01-pooled" + base_url = "https://api.anthropic.com" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), + patch("agent.anthropic_adapter.resolve_anthropic_token", side_effect=AssertionError("legacy path should not run")), + patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()) as mock_build, + ): + from agent.auxiliary_client import _try_anthropic + + client, model = _try_anthropic() + + assert client is not None + assert model == "claude-haiku-4-5-20251001" + assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled" + class TestExpiredCodexFallback: """Test that expired Codex tokens don't block the auto chain.""" @@ -533,6 +558,32 @@ class TestGetTextAuxiliaryClient: from agent.auxiliary_client import CodexAuxiliaryClient assert isinstance(client, CodexAuxiliaryClient) + def test_codex_pool_entry_takes_priority_over_auth_store(self): + class _Entry: + access_token = "pooled-codex-token" + base_url = "https://chatgpt.com/backend-api/codex" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), + patch("agent.auxiliary_client.OpenAI"), + patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")), + ): + from agent.auxiliary_client import _try_codex + + client, model = _try_codex() + + from agent.auxiliary_client import CodexAuxiliaryClient + + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.2-codex" + def test_returns_none_when_nothing_available(self, monkeypatch): monkeypatch.delenv("OPENAI_BASE_URL", raising=False) monkeypatch.delenv("OPENAI_API_KEY", raising=False) @@ -581,6 +632,35 @@ class TestVisionClientFallback: assert client.__class__.__name__ == "AnthropicAuxiliaryClient" assert model == "claude-haiku-4-5-20251001" + +class TestAuxiliaryPoolAwareness: + def test_try_nous_uses_pool_entry(self): + class _Entry: + access_token = "pooled-access-token" + agent_key = "pooled-agent-key" + inference_base_url = "https://inference.pool.example/v1" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + with ( + patch("agent.auxiliary_client.load_pool", return_value=_Pool()), + patch("agent.auxiliary_client.OpenAI") as mock_openai, + ): + from agent.auxiliary_client import _try_nous + + client, model = _try_nous() + + assert client is not None + assert model == "gemini-3-flash" + call_kwargs = mock_openai.call_args.kwargs + assert call_kwargs["api_key"] == "pooled-agent-key" + assert call_kwargs["base_url"] == "https://inference.pool.example/v1" + def test_resolve_provider_client_copilot_uses_runtime_credentials(self, monkeypatch): monkeypatch.delenv("GITHUB_TOKEN", raising=False) monkeypatch.delenv("GH_TOKEN", raising=False) diff --git a/tests/test_auth_commands.py b/tests/test_auth_commands.py new file mode 100644 index 00000000000..28b8789254f --- /dev/null +++ b/tests/test_auth_commands.py @@ -0,0 +1,268 @@ +"""Tests for auth subcommands backed by the credential pool.""" + +from __future__ import annotations + +import base64 +import json + +import pytest + + +def _write_auth_store(tmp_path, payload: dict) -> None: + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps(payload, indent=2)) + + +def _jwt_with_email(email: str) -> str: + header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode() + payload = base64.urlsafe_b64encode( + json.dumps({"email": email}).encode() + ).rstrip(b"=").decode() + return f"{header}.{payload}.signature" + + +@pytest.fixture(autouse=True) +def _clear_provider_env(monkeypatch): + for key in ( + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN", + ): + monkeypatch.delenv(key, raising=False) + + +def test_auth_add_api_key_persists_manual_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "openrouter" + auth_type = "api-key" + api_key = "sk-or-manual" + label = "personal" + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["openrouter"] + entry = next(item for item in entries if item["source"] == "manual") + assert entry["label"] == "personal" + assert entry["auth_type"] == "api_key" + assert entry["source"] == "manual" + assert entry["access_token"] == "sk-or-manual" + + +def test_auth_add_anthropic_oauth_persists_pool_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + token = _jwt_with_email("claude@example.com") + monkeypatch.setattr( + "agent.anthropic_adapter.run_hermes_oauth_login_pure", + lambda: { + "access_token": token, + "refresh_token": "refresh-token", + "expires_at_ms": 1711234567000, + }, + ) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "anthropic" + auth_type = "oauth" + api_key = None + label = None + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["anthropic"] + entry = next(item for item in entries if item["source"] == "manual:hermes_pkce") + assert entry["label"] == "claude@example.com" + assert entry["source"] == "manual:hermes_pkce" + assert entry["refresh_token"] == "refresh-token" + assert entry["expires_at_ms"] == 1711234567000 + + +def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + token = _jwt_with_email("nous@example.com") + monkeypatch.setattr( + "hermes_cli.auth._nous_device_code_login", + lambda **kwargs: { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "scope": "inference:mint_agent_key", + "token_type": "Bearer", + "access_token": token, + "refresh_token": "refresh-token", + "obtained_at": "2026-03-23T10:00:00+00:00", + "expires_at": "2026-03-23T11:00:00+00:00", + "expires_in": 3600, + "agent_key": "ak-test", + "agent_key_id": "ak-id", + "agent_key_expires_at": "2026-03-23T10:30:00+00:00", + "agent_key_expires_in": 1800, + "agent_key_reused": False, + "agent_key_obtained_at": "2026-03-23T10:00:10+00:00", + "tls": {"insecure": False, "ca_bundle": None}, + }, + ) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "nous" + auth_type = "oauth" + api_key = None + label = None + portal_url = None + inference_url = None + client_id = None + scope = None + no_browser = False + timeout = None + insecure = False + ca_bundle = None + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["nous"] + entry = next(item for item in entries if item["source"] == "manual:device_code") + assert entry["label"] == "nous@example.com" + assert entry["source"] == "manual:device_code" + assert entry["agent_key"] == "ak-test" + assert entry["portal_base_url"] == "https://portal.example.com" + + +def test_auth_add_codex_oauth_persists_pool_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + token = _jwt_with_email("codex@example.com") + monkeypatch.setattr( + "hermes_cli.auth._codex_device_code_login", + lambda: { + "tokens": { + "access_token": token, + "refresh_token": "refresh-token", + }, + "base_url": "https://chatgpt.com/backend-api/codex", + "last_refresh": "2026-03-23T10:00:00Z", + }, + ) + + from hermes_cli.auth_commands import auth_add_command + + class _Args: + provider = "openai-codex" + auth_type = "oauth" + api_key = None + label = None + + auth_add_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["openai-codex"] + entry = next(item for item in entries if item["source"] == "manual:device_code") + assert entry["label"] == "codex@example.com" + assert entry["source"] == "manual:device_code" + assert entry["refresh_token"] == "refresh-token" + assert entry["base_url"] == "https://chatgpt.com/backend-api/codex" + + +def test_auth_remove_reindexes_priorities(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-api-primary", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "sk-ant-api-secondary", + }, + ] + }, + }, + ) + + from hermes_cli.auth_commands import auth_remove_command + + class _Args: + provider = "anthropic" + index = 1 + + auth_remove_command(_Args()) + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entries = payload["credential_pool"]["anthropic"] + assert len(entries) == 1 + assert entries[0]["label"] == "secondary" + assert entries[0]["priority"] == 0 + + +def test_auth_reset_clears_provider_statuses(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-api-primary", + "last_status": "exhausted", + "last_status_at": 1711230000.0, + "last_error_code": 402, + } + ] + }, + }, + ) + + from hermes_cli.auth_commands import auth_reset_command + + class _Args: + provider = "anthropic" + + auth_reset_command(_Args()) + + out = capsys.readouterr().out + assert "Reset status" in out + + payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + entry = payload["credential_pool"]["anthropic"][0] + assert entry["last_status"] is None + assert entry["last_status_at"] is None + assert entry["last_error_code"] is None diff --git a/tests/test_credential_pool.py b/tests/test_credential_pool.py new file mode 100644 index 00000000000..5d6668101df --- /dev/null +++ b/tests/test_credential_pool.py @@ -0,0 +1,297 @@ +"""Tests for multi-credential runtime pooling and rotation.""" + +from __future__ import annotations + +import json +import time + +import pytest + + +def _write_auth_store(tmp_path, payload: dict) -> None: + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text(json.dumps(payload, indent=2)) + + +def test_fill_first_selection_skips_recently_exhausted_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-api-primary", + "last_status": "exhausted", + "last_status_at": time.time(), + "last_error_code": 402, + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "sk-ant-api-secondary", + "last_status": "ok", + "last_status_at": None, + "last_error_code": None, + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + entry = pool.select() + + assert entry is not None + assert entry.id == "cred-2" + assert pool.current().id == "cred-2" + + +def test_exhausted_entry_resets_after_ttl(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-or-primary", + "base_url": "https://openrouter.ai/api/v1", + "last_status": "exhausted", + "last_status_at": time.time() - 90000, + "last_error_code": 429, + } + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + + assert entry is not None + assert entry.id == "cred-1" + assert entry.last_status == "ok" + + +def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "sk-ant-api-primary", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "api_key", + "priority": 1, + "source": "manual", + "access_token": "sk-ant-api-secondary", + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + assert pool.select().id == "cred-1" + + next_entry = pool.mark_exhausted_and_rotate(status_code=402) + + assert next_entry is not None + assert next_entry.id == "cred-2" + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + persisted = auth_payload["credential_pool"]["anthropic"][0] + assert persisted["last_status"] == "exhausted" + assert persisted["last_error_code"] == 402 + + +def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openai-codex": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "oauth", + "priority": 0, + "source": "device_code", + "access_token": "access-old", + "refresh_token": "refresh-old", + "base_url": "https://chatgpt.com/backend-api/codex", + }, + { + "id": "cred-2", + "label": "secondary", + "auth_type": "oauth", + "priority": 1, + "source": "device_code", + "access_token": "access-other", + "refresh_token": "refresh-other", + "base_url": "https://chatgpt.com/backend-api/codex", + }, + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + monkeypatch.setattr( + "hermes_cli.auth.refresh_codex_oauth_pure", + lambda access_token, refresh_token, timeout_seconds=20.0: { + "access_token": "access-new", + "refresh_token": "refresh-new", + }, + ) + + pool = load_pool("openai-codex") + current = pool.select() + assert current.id == "cred-1" + + refreshed = pool.try_refresh_current() + + assert refreshed is not None + assert refreshed.access_token == "access-new" + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + primary, secondary = auth_payload["credential_pool"]["openai-codex"] + assert primary["access_token"] == "access-new" + assert primary["refresh_token"] == "refresh-new" + assert secondary["access_token"] == "access-other" + assert secondary["refresh_token"] == "refresh-other" + + +def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded") + _write_auth_store(tmp_path, {"version": 1, "providers": {}}) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + + assert entry is not None + assert entry.source == "env:OPENROUTER_API_KEY" + assert entry.access_token == "sk-or-seeded" + + +def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entry = pool.select() + + assert entry is not None + assert entry.source == "device_code" + assert entry.portal_base_url == "https://portal.example.com" + assert entry.agent_key == "agent-key" + + +def test_singleton_seed_does_not_clobber_manual_oauth_entry(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "anthropic": [ + { + "id": "manual-1", + "label": "manual-pkce", + "auth_type": "oauth", + "priority": 0, + "source": "manual:hermes_pkce", + "access_token": "manual-token", + "refresh_token": "manual-refresh", + "expires_at_ms": 1711234567000, + } + ] + }, + }, + ) + + monkeypatch.setattr( + "agent.anthropic_adapter.read_hermes_oauth_credentials", + lambda: { + "accessToken": "seeded-token", + "refreshToken": "seeded-refresh", + "expiresAt": 1711234999000, + }, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: None, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("anthropic") + entries = pool.entries() + + assert len(entries) == 2 + assert {entry.source for entry in entries} == {"manual:hermes_pkce", "hermes_pkce"} diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 81e16b70276..912fc899c39 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1528,6 +1528,62 @@ class TestNousCredentialRefresh: assert isinstance(agent.client, _RebuiltClient) +class TestCredentialPoolRecovery: + def test_recover_with_pool_rotates_on_402(self, agent): + current = SimpleNamespace(label="primary") + next_entry = SimpleNamespace(label="secondary") + + class _Pool: + def current(self): + return current + + def mark_exhausted_and_rotate(self, *, status_code): + assert status_code == 402 + return next_entry + + agent._credential_pool = _Pool() + agent._swap_credential = MagicMock() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=402, + retry_429_with_same_cred=False, + ) + + assert recovered is True + assert retry_same is False + agent._swap_credential.assert_called_once_with(next_entry) + + def test_recover_with_pool_retries_first_429_then_rotates(self, agent): + next_entry = SimpleNamespace(label="secondary") + + class _Pool: + def current(self): + return SimpleNamespace(label="primary") + + def mark_exhausted_and_rotate(self, *, status_code): + assert status_code == 429 + return next_entry + + agent._credential_pool = _Pool() + agent._swap_credential = MagicMock() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=429, + retry_429_with_same_cred=False, + ) + assert recovered is False + assert retry_same is True + agent._swap_credential.assert_not_called() + + recovered, retry_same = agent._recover_with_credential_pool( + status_code=429, + retry_429_with_same_cred=True, + ) + assert recovered is True + assert retry_same is False + agent._swap_credential.assert_called_once_with(next_entry) + + class TestMaxTokensParam: """Verify _max_tokens_param returns the correct key for each provider.""" diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index c478bc23cb4..648e3c8e321 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -1,6 +1,55 @@ from hermes_cli import runtime_provider as rp +def test_resolve_runtime_provider_uses_credential_pool(monkeypatch): + class _Entry: + access_token = "pool-token" + source = "manual" + base_url = "https://chatgpt.com/backend-api/codex" + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return _Entry() + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + + resolved = rp.resolve_runtime_provider(requested="openai-codex") + + assert resolved["provider"] == "openai-codex" + assert resolved["api_key"] == "pool-token" + assert resolved["credential_pool"] is not None + assert resolved["source"] == "manual" + + +def test_resolve_runtime_provider_falls_back_when_pool_empty(monkeypatch): + class _Pool: + def has_credentials(self): + return False + + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + monkeypatch.setattr( + rp, + "resolve_codex_runtime_credentials", + lambda: { + "provider": "openai-codex", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "codex-token", + "source": "hermes-auth-store", + "last_refresh": "2026-02-26T00:00:00Z", + }, + ) + + resolved = rp.resolve_runtime_provider(requested="openai-codex") + + assert resolved["api_key"] == "codex-token" + assert resolved.get("credential_pool") is None + + def test_resolve_runtime_provider_codex(monkeypatch): monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") monkeypatch.setattr( diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 1a779f8a0bb..d86a8c4889d 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -593,7 +593,14 @@ class TestDelegationCredentialResolution(unittest.TestCase): "model": "qwen2.5-coder", "base_url": "http://localhost:1234/v1", } - with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False): + with patch.dict( + os.environ, + { + "OPENROUTER_API_KEY": "env-openrouter-key", + "OPENAI_API_KEY": "", + }, + clear=False, + ): with self.assertRaises(ValueError) as ctx: _resolve_delegation_credentials(cfg, parent) self.assertIn("OPENAI_API_KEY", str(ctx.exception)) diff --git a/tests/tools/test_transcription.py b/tests/tools/test_transcription.py index 0ce3f2468df..5f42272a546 100644 --- a/tests/tools/test_transcription.py +++ b/tests/tools/test_transcription.py @@ -18,6 +18,11 @@ import pytest # --------------------------------------------------------------------------- +@pytest.fixture(autouse=True) +def _clear_openai_env(monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + class TestGetProvider: """_get_provider() picks the right backend based on config + availability."""