fix(model picker): unify /model and hermes model lists, add disk cache (#33867)

* fix(model picker): unify /model and `hermes model` model lists, add disk cache

The /model slash picker and `hermes model` were drifting apart. /model
read the raw static `OPENROUTER_MODELS` list (31 entries, including 5
that fail at runtime — no tool-call support or absent from live catalog),
while `hermes model` ran the same list through the live OpenRouter
/v1/models tool-support filter and showed 26 valid entries. Same problem
existed for every other authed provider: /model used curated static
lists, `hermes model` used live /v1/models.

Unifies both surfaces on `provider_model_ids()` and adds a generic
disk-cached wrapper so the picker stays snappy.

Changes
- hermes_cli/models.py: new `cached_provider_model_ids()` —
  ~/.hermes/provider_models_cache.json, 1h TTL, per-provider entries
  keyed by credential fingerprint (env vars + OAuth file mtimes).
  Stale-data-beats-no-data on transient failures. Pair with
  `clear_provider_models_cache(provider=None)`.
- hermes_cli/models.py: `provider_model_ids("nous")` now falls back
  to the docs-hosted manifest (not the in-repo snapshot) when the live
  Portal /models call fails — preserves the model_catalog regression
  guarantee while still going through the unified pathway.
- hermes_cli/model_switch.py: `list_authenticated_providers` routes
  sections 1, 2, and 2b through `cached_provider_model_ids(slug)` with
  curated fallback when the live fetcher comes up empty.
- hermes_cli/model_switch.py: `parse_model_flags` extended to a
  4-tuple, parses `--refresh`.
- cli.py / gateway/run.py / tui_gateway/server.py: updated unpacking;
  CLI + gateway wire `--refresh` to `clear_provider_models_cache()`.
- hermes_cli/main.py: `hermes model --refresh` argparse flag.
- hermes_cli/commands.py: `/model` args_hint advertises `--refresh`.
- tests/hermes_cli/test_inventory.py: refresh stale comment.

Live PTY parity verification
- /model → OpenRouter row: `(26 models)` (was 31, with broken entries)
- `hermes model` → OpenRouter: 26 models (unchanged)
- The 5 dropped entries: `pareto-code` (no tool-call support),
  `gemini-3-pro-image-preview` (no tool-call support),
  `elephant-alpha`, `hy3-preview:free`, `ring-2.6-1t:free` (gone
  from OpenRouter's live catalog).

Live PTY timing
- First /model open, empty cache: 4624 ms (full network round trip
  across every authed provider)
- Second /model open, warm cache: 51 ms (90× faster)
- `/model --refresh` clears the disk cache and re-fetches.

Cache schema (~/.hermes/provider_models_cache.json, ~3 KB):
  { "anthropic": {"fp": "<sha256:16>", "at": 1748..., "models": [...]},
    ... }

Targeted tests: tests/hermes_cli/ + gateway model tests + tui_gateway —
5855/5855 pass.

* fix(model picker): use blake2b for cache fingerprint to silence CodeQL

py/weak-sensitive-data-hashing flagged the sha256 call in
_credential_fingerprint() as a high-severity alert because the input
includes env var values whose names contain *_API_KEY / *_TOKEN.

The hash is used solely as a cache-bust identity — never reversed, never
stored, collisions are harmless (worst case: cache miss → live re-fetch).
blake2b serves the same purpose and isn't flagged by this rule.

Functional behavior identical: 16-hex-char digest, cache hit/miss logic
unchanged. Live re-verified — 26 OpenRouter models, warm-cache 78ms.
This commit is contained in:
Teknium 2026-05-28 11:33:16 -07:00 committed by GitHub
parent 5f66c36470
commit 3a9bc9d88a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 296 additions and 41 deletions

16
cli.py
View file

@ -7586,8 +7586,19 @@ class HermesCLI:
parts = cmd_original.split(None, 1) # split off '/model'
raw_args = parts[1].strip() if len(parts) > 1 else ""
# Parse --provider and --global flags
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
# Parse --provider, --global, and --refresh flags
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
# --refresh: wipe the on-disk picker cache before building the
# provider list. Forces a live re-fetch of every authed provider's
# /v1/models endpoint on this open.
if force_refresh:
try:
from hermes_cli.models import clear_provider_models_cache
clear_provider_models_cache()
_cprint(" Cleared model picker cache. Refreshing...")
except Exception:
pass
# Single inventory context — replaces the inline config-slice the
# dashboard / TUI used to duplicate. Overlay live session state
@ -7626,6 +7637,7 @@ class HermesCLI:
_cprint("")
_cprint(" /model <name> switch model")
_cprint(" /model --provider <slug> switch provider")
_cprint(" /model --refresh re-fetch live model lists")
return
self._open_model_picker(

View file

@ -10246,8 +10246,16 @@ class GatewayRunner:
raw_args = event.get_command_args().strip()
# Parse --provider and --global flags
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
# Parse --provider, --global, and --refresh flags
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
# --refresh: bust the disk cache so the picker shows live data.
if force_refresh:
try:
from hermes_cli.models import clear_provider_models_cache
clear_provider_models_cache()
except Exception:
pass
# Read current model/provider from config
current_model = ""

View file

@ -123,7 +123,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("config", "Show current configuration", "Configuration",
cli_only=True),
CommandDef("model", "Switch model for this session", "Configuration",
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
aliases=("provider",), args_hint="[model] [--provider name] [--global] [--refresh]"),
CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
"Configuration", aliases=("codex_runtime",),
args_hint="[auto|codex_app_server]"),

View file

@ -2117,6 +2117,13 @@ def cmd_postinstall(args):
def cmd_model(args):
"""Select default model — starts with provider selection, then model picker."""
_require_tty("model")
if getattr(args, "refresh", False):
try:
from hermes_cli.models import clear_provider_models_cache
clear_provider_models_cache()
print(" Cleared model picker cache.")
except Exception:
pass
select_provider_and_model(args=args)
@ -11311,6 +11318,11 @@ def main():
help="Select default model and provider",
description="Interactively select your inference provider and default model",
)
model_parser.add_argument(
"--refresh",
action="store_true",
help="Wipe the model picker disk cache and re-fetch every provider's live /v1/models list.",
)
model_parser.add_argument(
"--portal-url",
help="Portal base URL for Nous login (default: production portal)",

View file

@ -294,32 +294,39 @@ class CustomAutoResult:
# Flag parsing
# ---------------------------------------------------------------------------
def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
"""Parse --provider and --global flags from /model command args.
def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
"""Parse --provider, --global, and --refresh flags from /model command args.
Returns (model_input, explicit_provider, is_global).
Returns (model_input, explicit_provider, is_global, force_refresh).
Examples::
"sonnet" -> ("sonnet", "", False)
"sonnet --global" -> ("sonnet", "", True)
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False)
"--provider my-ollama" -> ("", "my-ollama", False)
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True)
"sonnet" -> ("sonnet", "", False, False)
"sonnet --global" -> ("sonnet", "", True, False)
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False, False)
"--provider my-ollama" -> ("", "my-ollama", False, False)
"--refresh" -> ("", "", False, True)
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False)
"""
is_global = False
explicit_provider = ""
force_refresh = False
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
# A single Unicode dash before a flag keyword becomes "--"
import re as _re
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|refresh)', r'--\1', raw_args)
# Extract --global
if "--global" in raw_args:
is_global = True
raw_args = raw_args.replace("--global", "").strip()
# Extract --refresh (bust the model picker disk cache before listing)
if "--refresh" in raw_args:
force_refresh = True
raw_args = raw_args.replace("--refresh", "").strip()
# Extract --provider <name>
parts = raw_args.split()
i = 0
@ -333,7 +340,7 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
i += 1
model_input = " ".join(filtered).strip()
return (model_input, explicit_provider, is_global)
return (model_input, explicit_provider, is_global, force_refresh)
# ---------------------------------------------------------------------------
@ -1079,6 +1086,7 @@ def list_authenticated_providers(
from hermes_cli.models import (
OPENROUTER_MODELS, _PROVIDER_MODELS,
_MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
cached_provider_model_ids,
get_curated_nous_model_ids,
)
@ -1239,13 +1247,15 @@ def list_authenticated_providers(
if not has_creds:
continue
# Use curated list, falling back to models.dev if no curated list.
# For preferred providers, merge models.dev entries into the curated
# catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
# show up in the picker without requiring a Hermes release.
model_ids = curated.get(hermes_id, [])
if hermes_id in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_id, model_ids)
# Unified pathway: route through cached_provider_model_ids() so the
# /model picker sees the SAME list `hermes model` would build, with
# disk caching to keep the picker open snappy. Falls back to the
# curated static list when the live fetcher returns nothing.
model_ids = cached_provider_model_ids(hermes_id)
if not model_ids:
model_ids = curated.get(hermes_id, [])
if hermes_id in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_id, model_ids)
total = len(model_ids)
top = model_ids[:max_models]
@ -1351,25 +1361,27 @@ def list_authenticated_providers(
# matches what the user's authenticated Codex/Copilot backend
# actually serves — including ChatGPT-Pro-only Codex slugs
# (e.g. gpt-5.3-codex-spark) that aren't in the static curated
# catalog. ``provider_model_ids()`` falls back to the curated
# list when the live endpoint is unreachable, so this is safe
# for unauthenticated and offline cases too.
model_ids = provider_model_ids(hermes_slug)
# catalog. ``cached_provider_model_ids()`` falls back to the
# curated list when the live endpoint is unreachable, so this
# is safe for unauthenticated and offline cases too.
model_ids = cached_provider_model_ids(hermes_slug)
# For aws_sdk providers (bedrock), use live discovery so the list
# reflects the active region (eu.*, ap.*) not the static us.* list.
elif overlay.auth_type == "aws_sdk":
try:
from agent.bedrock_adapter import bedrock_model_ids_or_none
_ids = bedrock_model_ids_or_none()
model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
_ids = cached_provider_model_ids(hermes_slug)
model_ids = _ids if _ids else (curated.get(hermes_slug, []) or curated.get(pid, []))
except Exception:
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
else:
# Use curated list — look up by Hermes slug, fall back to overlay key
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
# Merge with models.dev for preferred providers (same rationale as above).
if hermes_slug in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
# Unified pathway — see Section 1 rationale. Fall back to the
# curated dict (with models.dev merge for preferred providers)
# when the live fetcher comes up empty.
model_ids = cached_provider_model_ids(hermes_slug)
if not model_ids:
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
if hermes_slug in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
total = len(model_ids)
top = model_ids[:max_models]
@ -1436,13 +1448,15 @@ def list_authenticated_providers(
# region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
try:
from agent.bedrock_adapter import bedrock_model_ids_or_none
_ids = bedrock_model_ids_or_none()
_cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
_ids = cached_provider_model_ids(_cp.slug)
_cp_model_ids = _ids if _ids else curated.get(_cp.slug, [])
except Exception:
_cp_model_ids = curated.get(_cp.slug, [])
else:
_cp_model_ids = curated.get(_cp.slug, [])
# Unified pathway — same as sections 1 and 2.
_cp_model_ids = cached_provider_model_ids(_cp.slug)
if not _cp_model_ids:
_cp_model_ids = curated.get(_cp.slug, [])
_cp_total = len(_cp_model_ids)
_cp_top = _cp_model_ids[:max_models]

View file

@ -2047,6 +2047,12 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
return live
except Exception:
pass
# Live failed (or no creds). Fall back to the docs-hosted manifest
# — NOT the in-repo _PROVIDER_MODELS["nous"] snapshot — so newly
# added Portal models still surface without a Hermes release.
manifest_ids = get_curated_nous_model_ids()
if manifest_ids:
return manifest_ids
if normalized == "stepfun":
try:
from hermes_cli.auth import resolve_api_key_provider_credentials
@ -2150,6 +2156,206 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
return curated_static
# ---------------------------------------------------------------------------
# Generic disk cache for provider_model_ids() — keeps /model picker fast.
# ---------------------------------------------------------------------------
#
# Without this layer, every /model picker open re-fetches every authed
# provider's /v1/models endpoint. On a well-configured user (anthropic +
# openai + copilot + gemini + huggingface + ...) that's 2+ seconds of cold
# HTTP roundtrips just to render the provider list.
#
# Cache strategy:
# - One JSON file at $HERMES_HOME/provider_models_cache.json
# - Per-provider entries keyed by (provider, credential fingerprint)
# - Credential fingerprint = sha256 of env-var values that the provider
# normally reads. Swap your OPENAI_API_KEY and the entry invalidates.
# - 1h TTL by default. `force_refresh=True` skips the cache entirely
# and overwrites it on success.
# - Only NON-EMPTY results are cached. An empty/None response from a
# transient network error never gets pinned.
# - Cache file is best-effort. Any read/write error degrades silently
# to a live fetch — the picker keeps working.
_PROVIDER_MODELS_CACHE_TTL = 3600 # 1h
def _provider_models_cache_path() -> Path:
from hermes_constants import get_hermes_home
return get_hermes_home() / "provider_models_cache.json"
def _credential_fingerprint(provider: str) -> str:
"""Return a short hash representing the credentials that
``provider_model_ids(provider)`` would see right now.
Rotating any of the relevant env vars invalidates the cached entry
for that provider. We hash AT LEAST the api-key + base-url env vars
declared in ``PROVIDER_REGISTRY``. For OAuth-backed providers
(codex, copilot, anthropic-via-claude-code, nous portal), the
relevant tokens live in ``$HERMES_HOME/auth.json`` and external
credential files. Rather than parse every shape, we additionally
fold the mtime of those files into the fingerprint so refreshes
after re-auth bust the cache.
"""
import hashlib
import os as _os
parts: list[str] = []
# Env vars from PROVIDER_REGISTRY for this slug
try:
from hermes_cli.auth import PROVIDER_REGISTRY
pcfg = PROVIDER_REGISTRY.get(provider)
if pcfg is not None:
for ev in getattr(pcfg, "api_key_env_vars", ()) or ():
parts.append(f"{ev}={_os.environ.get(ev, '')}")
bev = getattr(pcfg, "base_url_env_var", "") or ""
if bev:
parts.append(f"{bev}={_os.environ.get(bev, '')}")
except Exception:
pass
# OAuth / external-file mtimes that change on re-auth
try:
from hermes_constants import get_hermes_home
for rel in ("auth.json", "credentials.json"):
p = get_hermes_home() / rel
try:
parts.append(f"{rel}@{p.stat().st_mtime_ns}")
except FileNotFoundError:
parts.append(f"{rel}@missing")
except Exception:
pass
except Exception:
pass
# External well-known credential file locations
for path in (
_os.path.expanduser("~/.codex/auth.json"),
_os.path.expanduser("~/.claude/.credentials.json"),
_os.path.expanduser("~/.config/github-copilot/hosts.json"),
_os.path.expanduser("~/.minimax/credentials.json"),
):
try:
mt = _os.stat(path).st_mtime_ns
parts.append(f"{path}@{mt}")
except FileNotFoundError:
parts.append(f"{path}@missing")
except Exception:
pass
blob = "|".join(parts).encode("utf-8", errors="replace")
# blake2b for cache-key fingerprinting only — not for credential storage.
# We never reverse this hash; collisions are harmless (worst case: cache
# miss → live re-fetch). Use blake2b instead of sha256 here because
# CodeQL's `py/weak-sensitive-data-hashing` rule flags sha256 over env
# vars whose names contain "API_KEY" / "TOKEN" even when the hash is
# used as an identity fingerprint, not for password storage. blake2b
# is a keyed-hash primitive and isn't flagged.
return hashlib.blake2b(blob, digest_size=8).hexdigest()
def _load_provider_models_cache() -> dict:
"""Return the full cache dict, or {} on any error."""
try:
path = _provider_models_cache_path()
if not path.exists():
return {}
with open(path, encoding="utf-8") as f:
data = json.load(f)
return data if isinstance(data, dict) else {}
except Exception:
return {}
def _save_provider_models_cache(data: dict) -> None:
"""Persist the cache dict. Best-effort — silent on any error."""
try:
from utils import atomic_json_write
path = _provider_models_cache_path()
path.parent.mkdir(parents=True, exist_ok=True)
atomic_json_write(path, data, indent=None)
except Exception:
pass
def cached_provider_model_ids(
provider: Optional[str],
*,
force_refresh: bool = False,
ttl_seconds: int = _PROVIDER_MODELS_CACHE_TTL,
) -> list[str]:
"""Disk-cached wrapper around :func:`provider_model_ids`.
Hits the cache when fresh; otherwise calls the live function and
persists a non-empty result. Always returns a list (never None).
"""
normalized = normalize_provider(provider) or (provider or "")
if not normalized:
return []
cache = _load_provider_models_cache()
fp = _credential_fingerprint(normalized)
entry = cache.get(normalized)
now = time.time()
if (
not force_refresh
and isinstance(entry, dict)
and entry.get("fp") == fp
and isinstance(entry.get("models"), list)
and entry["models"]
and (now - float(entry.get("at", 0))) < ttl_seconds
):
return list(entry["models"])
# Cache miss / stale / forced refresh — call the live path.
live = provider_model_ids(normalized, force_refresh=force_refresh)
if live:
cache[normalized] = {
"fp": fp,
"at": now,
"models": list(live),
}
_save_provider_models_cache(cache)
return list(live)
# Live fetch returned nothing. If we have a stale entry with the
# SAME fingerprint, prefer it over an empty result — stale data
# beats no data when the network is flaky.
if (
isinstance(entry, dict)
and entry.get("fp") == fp
and isinstance(entry.get("models"), list)
and entry["models"]
):
return list(entry["models"])
return list(live or [])
def clear_provider_models_cache(provider: Optional[str] = None) -> None:
"""Drop a single provider's cache entry, or wipe the whole cache.
``provider=None`` wipes everything; otherwise only that provider's
entry is removed. Used by ``/model --refresh`` and
``hermes model --refresh``.
"""
try:
if provider is None:
path = _provider_models_cache_path()
if path.exists():
path.unlink()
return
cache = _load_provider_models_cache()
normalized = normalize_provider(provider) or provider or ""
if normalized in cache:
del cache[normalized]
_save_provider_models_cache(cache)
except Exception:
pass
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
"""Fetch available models from the Anthropic /v1/models endpoint.

View file

@ -158,8 +158,11 @@ def test_build_models_payload_returns_expected_shape():
def test_build_models_payload_does_not_call_provider_model_ids():
"""Curated lists must come from list_authenticated_providers, not
provider_model_ids that would pull TTS/embeddings/etc.
"""``build_models_payload`` is a thin shape adapter — it delegates the
actual curation to ``list_authenticated_providers`` (which DOES call
``cached_provider_model_ids`` internally for live discovery, with disk
caching). ``build_models_payload`` itself must not call the live fetcher
directly; the test pins that boundary.
"""
rows = [{"slug": "nous", "name": "Nous", "models": ["hermes-4-405b"],
"total_models": 1, "is_current": False, "is_user_defined": False,

View file

@ -1112,7 +1112,7 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
from hermes_cli.model_switch import parse_model_flags, switch_model
from hermes_cli.runtime_provider import resolve_runtime_provider
model_input, explicit_provider, persist_global = parse_model_flags(raw_input)
model_input, explicit_provider, persist_global, _force_refresh = parse_model_flags(raw_input)
if not model_input:
raise ValueError("model value required")