"""
Canonical model catalogs and lightweight validation helpers.
Add, remove, or reorder entries here — both `hermes setup` and
`hermes` provider-selection will pick up the change automatically.
"""
from __future__ import annotations
import json
import os
import urllib.request
import urllib.error
import time
from difflib import get_close_matches
from pathlib import Path
from typing import Any, NamedTuple, Optional
from hermes_cli import __version__ as _HERMES_VERSION
# Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity
# Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
COPILOT_BASE_URL = "https://api.githubcopilot.com"
COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
COPILOT_REASONING_EFFORTS_GPT5 = ["minimal", "low", "medium", "high"]
COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
# Anthropic
("anthropic/claude-opus-4.8", ""),
("anthropic/claude-opus-4.8-fast", "2x price, higher output speed"),
("anthropic/claude-sonnet-4.6", ""),
("anthropic/claude-haiku-4.5", ""),
# OpenAI
("openai/gpt-5.5", ""),
("openai/gpt-5.5-pro", ""),
("openai/gpt-5.4-mini", ""),
# Google
("google/gemini-3-pro-preview", ""),
("google/gemini-3.1-pro-preview", ""),
("google/gemini-3.5-flash", ""),
# xAI
("x-ai/grok-4.3", ""),
# DeepSeek
("deepseek/deepseek-v4-pro", ""),
("deepseek/deepseek-v4-flash", ""),
# Qwen
("qwen/qwen3.7-max", ""),
("qwen/qwen3.7-plus", ""),
("qwen/qwen3.6-35b-a3b", ""),
# MoonshotAI
("moonshotai/kimi-k2.6", "recommended"),
# MiniMax
("minimax/minimax-m3", ""),
# Z-AI
("z-ai/glm-5.1", ""),
# Xiaomi
("xiaomi/mimo-v2.5-pro", ""),
# Tencent
("tencent/hy3-preview", ""),
# StepFun
("stepfun/step-3.7-flash", ""),
# NVIDIA
("nvidia/nemotron-3-super-120b-a12b", ""),
# OpenRouter routers
("openrouter/pareto-code", "auto-routes to cheapest coder meeting openrouter.min_coding_score"),
# Free tier
("openrouter/elephant-alpha", "free"),
("openrouter/owl-alpha", "free"),
("poolside/laguna-m.1:free", "free"),
("tencent/hy3-preview:free", "free"),
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
("nvidia/nemotron-3-ultra-550b-a55b:free", "free"),
("inclusionai/ring-2.6-1t:free", "free"),
]
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
def _codex_curated_models() -> list[str]:
"""Derive the openai-codex curated list from codex_models.py.
Single source of truth: DEFAULT_CODEX_MODELS + forward-compat synthesis.
This keeps the gateway /model picker in sync with the CLI `hermes model`
flow without maintaining a separate static list.
"""
from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, _add_forward_compat_models
return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))
# Static fallback for xAI when the models.dev disk cache is empty (fresh
# install, offline first run, etc.). Mirrors the xAI-direct model IDs from
# $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames
# or retires a model, the disk cache picks it up on the next refresh and the
# fallback here only matters until that refresh lands.
#
# Models retired by xAI on May 15, 2026 are excluded — see
# https://docs.x.ai/developers/migration/may-15-retirement
# (grok-4, grok-4-0709, grok-4-fast{,-reasoning,-non-reasoning},
# grok-4-1-fast{,-reasoning,-non-reasoning}, grok-code-fast-1 → grok-4.3).
_XAI_STATIC_FALLBACK: list[str] = [
"grok-4.3",
"grok-4.20-0309-reasoning",
"grok-4.20-0309-non-reasoning",
"grok-4.20-multi-agent-0309",
]
_XAI_TOP_MODEL = "grok-4.3"
def _xai_promote_top(ids: list[str]) -> list[str]:
"""Pin the headline xAI model to the top of the curated list."""
if _XAI_TOP_MODEL in ids:
return [_XAI_TOP_MODEL] + [m for m in ids if m != _XAI_TOP_MODEL]
return ids
def _xai_curated_models() -> list[str]:
"""Derive the xAI-direct curated list from models.dev disk cache.
Reads $HERMES_HOME/models_dev_cache.json directly (no network) so this
runs at import time without blocking. Falls back to ``_XAI_STATIC_FALLBACK``
when the cache is empty or unreadable. Hermes refreshes the cache from
https://models.dev/api.json on normal use, so this list self-heals as
xAI renames models.
Mirrors ``_codex_curated_models()``'s role for openai-codex.
"""
try:
from agent.models_dev import _load_disk_cache
data = _load_disk_cache()
xai = data.get("xai") if isinstance(data, dict) else None
models = xai.get("models") if isinstance(xai, dict) else None
if isinstance(models, dict) and models:
ids = [mid for mid in models.keys() if isinstance(mid, str)]
if ids:
return _xai_promote_top(sorted(ids))
except Exception:
# Any failure (missing file, malformed JSON, import error)
# falls through to the static list.
pass
return list(_XAI_STATIC_FALLBACK)
_PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
# Anthropic
"anthropic/claude-opus-4.8",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-haiku-4.5",
# OpenAI
"openai/gpt-5.5",
"openai/gpt-5.5-pro",
"openai/gpt-5.4-mini",
# Google
"google/gemini-3-pro-preview",
"google/gemini-3.1-pro-preview",
"google/gemini-3.5-flash",
# xAI
"x-ai/grok-4.3",
# DeepSeek
"deepseek/deepseek-v4-pro",
"deepseek/deepseek-v4-flash",
# Qwen
"qwen/qwen3.7-max",
"qwen/qwen3.7-plus",
"qwen/qwen3.6-35b-a3b",
# MoonshotAI
"moonshotai/kimi-k2.6",
# MiniMax
"minimax/minimax-m3",
# Z-AI
"z-ai/glm-5.1",
# Xiaomi
"xiaomi/mimo-v2.5-pro",
# Tencent
"tencent/hy3-preview",
# StepFun
"stepfun/step-3.7-flash",
# NVIDIA
"nvidia/nemotron-3-super-120b-a12b",
],
# Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
# provider_model_ids fallback when /v1/models is unavailable.
"openai": [
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5-mini",
"gpt-5.3-codex",
"gpt-5.2-codex",
"gpt-4.1",
"gpt-4o",
"gpt-4o-mini",
],
"openai-api": [
"gpt-5.5",
"gpt-5.5-pro",
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5.4-nano",
"gpt-5-mini",
"gpt-5.3-codex",
"gpt-4.1",
"gpt-4o",
"gpt-4o-mini",
],
"openai-codex": _codex_curated_models(),
"xai-oauth": _xai_curated_models(),
"copilot-acp": [
"copilot-acp",
],
"copilot": [
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5-mini",
"gpt-5.3-codex",
"gpt-5.2-codex",
"gpt-4.1",
"gpt-4o",
"gpt-4o-mini",
"claude-sonnet-4.6",
"claude-sonnet-4",
"claude-sonnet-4.5",
"claude-haiku-4.5",
"gemini-3.1-pro-preview",
"gemini-3-pro-preview",
"gemini-3-flash-preview",
"gemini-2.5-pro",
],
"gemini": [
"gemini-3.1-pro-preview",
"gemini-3-pro-preview",
"gemini-3.5-flash",
"gemini-3.1-flash-lite-preview",
],
"google-gemini-cli": [
"gemini-3.1-pro-preview",
"gemini-3-pro-preview",
# Code Assist serves two flash slugs with different access gates
# (gemini-cli models.ts): gemini-3-flash-preview is the preview flash
# that subscription/free-tier OAuth users actually reach, while
# gemini-3.5-flash is GA-channel-gated. Offer both so non-GA users
# aren't stuck with a slug cloudcode-pa 404s for them.
"gemini-3-flash-preview",
"gemini-3.5-flash",
],
"zai": [
"glm-5.1",
"glm-5",
"glm-5v-turbo",
"glm-5-turbo",
"glm-4.7",
"glm-4.5",
"glm-4.5-flash",
],
"xai": _xai_curated_models(),
"nvidia": [
# NVIDIA flagship reasoning models
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-nano-30b-a3b",
"nvidia/llama-3.3-nemotron-super-49b-v1.5",
# Third-party agentic models hosted on build.nvidia.com
# (map to OpenRouter defaults — users get familiar picks on NIM)
"qwen/qwen3.5-397b-a17b",
"deepseek-ai/deepseek-v3.2",
"moonshotai/kimi-k2.6",
"minimaxai/minimax-m2.5",
"z-ai/glm5",
"openai/gpt-oss-120b",
],
"kimi-coding": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
],
"kimi-coding-cn": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
],
"stepfun": [
"step-3.5-flash",
"step-3.5-flash-2603",
],
"moonshot": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
],
"minimax": [
"MiniMax-M3",
"MiniMax-M2.7",
"MiniMax-M2.5",
"MiniMax-M2.1",
"MiniMax-M2",
],
"minimax-oauth": [
"MiniMax-M3",
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
],
"minimax-cn": [
"MiniMax-M3",
"MiniMax-M2.7",
"MiniMax-M2.5",
"MiniMax-M2.1",
"MiniMax-M2",
],
"anthropic": [
"claude-opus-4-8",
"claude-opus-4-7",
"claude-opus-4-6",
"claude-sonnet-4-6",
"claude-opus-4-5-20251101",
"claude-sonnet-4-5-20250929",
"claude-opus-4-20250514",
"claude-sonnet-4-20250514",
"claude-haiku-4-5-20251001",
],
"deepseek": [
"deepseek-v4-pro",
"deepseek-v4-flash",
"deepseek-chat",
"deepseek-reasoner",
],
"xiaomi": [
"mimo-v2.5-pro",
"mimo-v2.5",
"mimo-v2-pro",
"mimo-v2-omni",
"mimo-v2-flash",
],
"tencent-tokenhub": [
"hy3-preview",
],
"arcee": [
"trinity-large-thinking",
"trinity-large-preview",
"trinity-mini",
],
"gmi": [
"zai-org/GLM-5.1-FP8",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2.5",
"google/gemini-3.1-flash-lite-preview",
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
],
"opencode-zen": [
"kimi-k2.5",
"gpt-5.4-pro",
"gpt-5.4",
"gpt-5.3-codex",
"gpt-5.2",
"gpt-5.2-codex",
"gpt-5.1",
"gpt-5.1-codex",
"gpt-5.1-codex-max",
"gpt-5.1-codex-mini",
"gpt-5",
"gpt-5-codex",
"gpt-5-nano",
"claude-opus-4-6",
"claude-opus-4-5",
"claude-opus-4-1",
"claude-sonnet-4-6",
"claude-sonnet-4-5",
"claude-sonnet-4",
"claude-haiku-4-5",
"claude-3-5-haiku",
"gemini-3.1-pro",
"gemini-3-pro",
"gemini-3-flash",
"minimax-m2.7",
"minimax-m2.5",
"minimax-m2.5-free",
"minimax-m2.1",
"glm-5",
"glm-4.7",
"glm-4.6",
"kimi-k2-thinking",
"kimi-k2",
"qwen3-coder",
"big-pickle",
],
"opencode-go": [
"kimi-k2.6",
"kimi-k2.5",
"glm-5.1",
"glm-5",
"mimo-v2.5-pro",
"mimo-v2.5",
"mimo-v2-pro",
"mimo-v2-omni",
"minimax-m2.7",
"minimax-m2.5",
"qwen3.7-max",
"qwen3.6-plus",
"qwen3.5-plus",
],
"kilocode": [
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
"google/gemini-3-pro-preview",
"google/gemini-3-flash-preview",
],
# Alibaba DashScope Coding platform (coding-intl) — default endpoint.
# Supports Qwen models + third-party providers (GLM, Kimi, MiniMax).
# Users with classic DashScope keys should override DASHSCOPE_BASE_URL
# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
"alibaba": [
"qwen3.7-max",
"qwen3.6-plus",
"kimi-k2.5",
"qwen3.5-plus",
"qwen3-coder-plus",
"qwen3-coder-next",
# Third-party models available on coding-intl
"glm-5",
"glm-4.7",
"MiniMax-M2.5",
],
# Alibaba Coding Plan — same platform as alibaba (DashScope coding-intl),
# separate provider ID with its own base_url_env_var.
"alibaba-coding-plan": [
"qwen3.7-max",
"qwen3.6-plus",
"qwen3.5-plus",
"qwen3-coder-plus",
"qwen3-coder-next",
"kimi-k2.5",
"glm-5",
"glm-4.7",
"MiniMax-M2.5",
],
# Curated HF model list — only agentic models that map to OpenRouter defaults.
"huggingface": [
"moonshotai/Kimi-K2.5",
"Qwen/Qwen3.5-397B-A17B",
"Qwen/Qwen3.5-35B-A3B",
"deepseek-ai/DeepSeek-V3.2",
"MiniMaxAI/MiniMax-M2.5",
"zai-org/GLM-5",
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
"moonshotai/Kimi-K2.6",
],
# AWS Bedrock — static fallback list used when dynamic discovery is
# unavailable (no boto3, no credentials, or API error). The agent
# prefers live discovery via ListFoundationModels + ListInferenceProfiles.
# Use inference profile IDs (us.*) since most models require them.
"bedrock": [
"us.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-opus-4-6-v1",
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
"us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"us.amazon.nova-pro-v1:0",
"us.amazon.nova-lite-v1:0",
"us.amazon.nova-micro-v1:0",
"deepseek.v3.2",
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
# Azure Foundry: user-provided endpoint and model.
# Empty list because models depend on the endpoint configuration.
"azure-foundry": [],
"novita": [
"moonshotai/kimi-k2.5",
"minimax/minimax-m2.7",
"zai-org/glm-5",
"deepseek/deepseek-v3-0324",
"deepseek/deepseek-r1-0528",
"qwen/qwen3-235b-a22b-fp8",
],
}
# ---------------------------------------------------------------------------
# Nous Portal free-model helper
# ---------------------------------------------------------------------------
# The Nous Portal models endpoint is the source of truth for which models
# are currently offered (free or paid). We trust whatever it returns and
# surface it to users as-is — no local allowlist filtering.
def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
"""Return True if *model_id* has zero-cost prompt AND completion pricing."""
p = pricing.get(model_id)
if not p:
return False
try:
return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0
except (TypeError, ValueError):
return False
# ---------------------------------------------------------------------------
# Nous Portal account tier detection
# ---------------------------------------------------------------------------
def is_nous_free_tier(account_info: dict[str, Any]) -> bool:
"""Return True if the account info indicates a free (unpaid) tier.
Prefer the Portal's explicit ``paid_service_access.allowed`` entitlement
decision. Legacy payloads fall back to ``subscription.monthly_charge == 0``.
Returns False when both signals are missing or unparseable.
"""
paid_access = account_info.get("paid_service_access")
if isinstance(paid_access, dict):
allowed = paid_access.get("allowed")
if isinstance(allowed, bool):
return not allowed
paid = paid_access.get("paid_access")
if isinstance(paid, bool):
return not paid
sub = account_info.get("subscription")
if not isinstance(sub, dict):
return False
charge = sub.get("monthly_charge")
if charge is None:
return False
try:
return float(charge) == 0
except (TypeError, ValueError):
return False
def partition_nous_models_by_tier(
model_ids: list[str],
pricing: dict[str, dict[str, str]],
free_tier: bool,
) -> tuple[list[str], list[str]]:
"""Split Nous models into (selectable, unavailable) based on user tier.
For paid-tier users: all models are selectable, none unavailable.
For free-tier users: only free models are selectable; paid models
are returned as unavailable (shown grayed out in the menu).
"""
if not free_tier:
return (model_ids, [])
if not pricing:
return (model_ids, []) # can't determine, show everything
selectable: list[str] = []
unavailable: list[str] = []
for mid in model_ids:
if _is_model_free(mid, pricing):
selectable.append(mid)
else:
unavailable.append(mid)
return (selectable, unavailable)
def union_with_portal_free_recommendations(
curated_ids: list[str],
pricing: dict[str, dict[str, str]],
portal_base_url: str = "",
*,
force_refresh: bool = False,
) -> tuple[list[str], dict[str, dict[str, str]]]:
"""Augment curated list + pricing with the Portal's ``freeRecommendedModels``.
The Portal's ``/api/nous/recommended-models`` endpoint advertises which
models are free *right now* — independent of what the in-repo
``_PROVIDER_MODELS["nous"]`` list happens to contain or whether the
docs-hosted catalog manifest has been rebuilt since the last release.
For free-tier users this is the source of truth: any model the Portal
flags as free should be selectable, even if the user is running an
older Hermes that doesn't ship that model in its hardcoded curated
list. This function returns an augmented ``(model_ids, pricing)``
pair where:
* Portal free recommendations missing from ``curated_ids`` are
appended after the curated list (so the in-repo curated models
show first and Portal-only picks follow).
* ``pricing`` gets a synthetic ``{"prompt": "0", "completion": "0"}``
entry for any free recommendation missing from the live pricing
map, so :func:`partition_nous_models_by_tier` keeps it.
Failures (network, parse, missing field) are silent and degrade to
returning the inputs unchanged.
"""
try:
payload = fetch_nous_recommended_models(
portal_base_url, force_refresh=force_refresh
)
except Exception:
return (list(curated_ids), dict(pricing))
free_block = payload.get("freeRecommendedModels") if isinstance(payload, dict) else None
if not isinstance(free_block, list) or not free_block:
return (list(curated_ids), dict(pricing))
portal_free_ids: list[str] = []
for entry in free_block:
name = _extract_model_name(entry)
if name:
portal_free_ids.append(name)
if not portal_free_ids:
return (list(curated_ids), dict(pricing))
augmented_pricing = dict(pricing)
free_synthetic = {"prompt": "0", "completion": "0"}
for mid in portal_free_ids:
if mid not in augmented_pricing:
augmented_pricing[mid] = dict(free_synthetic)
augmented_ids = list(curated_ids)
seen = set(augmented_ids)
# Append Portal free recommendations that aren't already curated, so the
# in-repo curated ("HA") models show first and Portal-only picks follow.
new_ones = [mid for mid in portal_free_ids if mid not in seen]
if new_ones:
augmented_ids = augmented_ids + new_ones
return (augmented_ids, augmented_pricing)
def union_with_portal_paid_recommendations(
curated_ids: list[str],
pricing: dict[str, dict[str, str]],
portal_base_url: str = "",
*,
force_refresh: bool = False,
) -> tuple[list[str], dict[str, dict[str, str]]]:
"""Augment curated list with the Portal's ``paidRecommendedModels``.
Mirror of :func:`union_with_portal_free_recommendations` for paid-tier
users. The Portal's ``/api/nous/recommended-models`` endpoint advertises
which paid models are blessed *right now* — independent of what the
in-repo ``_PROVIDER_MODELS["nous"]`` list happens to contain or whether
the docs-hosted catalog manifest has been rebuilt since the last release.
For paid-tier users this lets newly-launched paid models surface in the
picker even if the user is running an older Hermes that doesn't ship
them in its hardcoded curated list. This function returns an augmented
``(model_ids, pricing)`` pair where:
* Portal paid recommendations missing from ``curated_ids`` are
appended after the curated list (so the in-repo curated models
show first and Portal-only picks follow).
* ``pricing`` is left untouched — we deliberately do NOT synthesize
pricing entries for paid models. Live pricing is fetched separately
via :func:`get_pricing_for_provider`; if the live endpoint hasn't
published pricing yet, the picker shows a blank price column rather
than fabricating numbers. (The free helper synthesizes ``$0`` so
:func:`partition_nous_models_by_tier` keeps free models selectable;
no equivalent gating applies on the paid side, so synthesis would
only mislead the user.)
Failures (network, parse, missing field) are silent and degrade to
returning the inputs unchanged — never block the picker on a
Portal-side hiccup.
"""
try:
payload = fetch_nous_recommended_models(
portal_base_url, force_refresh=force_refresh
)
except Exception:
return (list(curated_ids), dict(pricing))
paid_block = payload.get("paidRecommendedModels") if isinstance(payload, dict) else None
if not isinstance(paid_block, list) or not paid_block:
return (list(curated_ids), dict(pricing))
portal_paid_ids: list[str] = []
for entry in paid_block:
name = _extract_model_name(entry)
if name:
portal_paid_ids.append(name)
if not portal_paid_ids:
return (list(curated_ids), dict(pricing))
augmented_ids = list(curated_ids)
seen = set(augmented_ids)
# Append Portal paid recommendations that aren't already curated, so the
# in-repo curated ("HA") models show first and Portal-only picks follow.
new_ones = [mid for mid in portal_paid_ids if mid not in seen]
if new_ones:
augmented_ids = augmented_ids + new_ones
return (augmented_ids, dict(pricing))
# ---------------------------------------------------------------------------
# TTL cache for free-tier detection — avoids repeated API calls within a
# session while still picking up upgrades quickly.
# ---------------------------------------------------------------------------
_FREE_TIER_CACHE_TTL: int = 180 # seconds (3 minutes)
_free_tier_cache: tuple[bool, float] | None = None # (result, timestamp)
def check_nous_free_tier(*, force_fresh: bool = False) -> bool:
"""Check if the current Nous Portal user is on a free (unpaid) tier.
Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid
hitting the Portal API on every call. The cache is short-lived so
that an account upgrade is reflected within a few minutes.
Returns True only when entitlement is known to be free. Unknown/error
states return False so this compatibility wrapper does not block users.
"""
global _free_tier_cache
now = time.monotonic()
if not force_fresh and _free_tier_cache is not None:
cached_result, cached_at = _free_tier_cache
if now - cached_at < _FREE_TIER_CACHE_TTL:
return cached_result
try:
from hermes_cli.nous_account import get_nous_portal_account_info
account_info = get_nous_portal_account_info(force_fresh=force_fresh)
result = account_info.is_free_tier
_free_tier_cache = (result, now)
return result
except Exception:
_free_tier_cache = (False, now)
return False # default to paid on error — don't block users
# ---------------------------------------------------------------------------
# Nous Portal recommended models
#
# The Portal publishes a curated list of suggested models (separated into
# paid and free tiers) plus dedicated recommendations for compaction (text
# summarisation / auxiliary) and vision tasks. We fetch it once per process
# with a TTL cache so callers can ask "what's the best aux model right now?"
# without hitting the network on every lookup.
#
# Shape of the response (fields we care about):
# {
# "paidRecommendedModels": [ {modelName, ...}, ... ],
# "freeRecommendedModels": [ {modelName, ...}, ... ],
# "paidRecommendedCompactionModel": {modelName, ...} | null,
# "paidRecommendedVisionModel": {modelName, ...} | null,
# "freeRecommendedCompactionModel": {modelName, ...} | null,
# "freeRecommendedVisionModel": {modelName, ...} | null,
# }
# ---------------------------------------------------------------------------
NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
_NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes)
# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
def _nous_recommended_disk_path() -> "Path":
"""Disk path for the persisted recommended-models cache."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "cache" / "nous_recommended_cache.json"
def _read_nous_recommended_disk(base: str) -> dict[str, Any] | None:
"""Return the last-known-good payload for ``base`` from disk, or None.
The disk file is a JSON object keyed by portal base URL so staging and
prod don't collide:
``{"": {"data": {...}, "ts": }}``.
"""
try:
with open(_nous_recommended_disk_path(), encoding="utf-8") as fh:
blob = json.load(fh)
except (OSError, json.JSONDecodeError):
return None
if not isinstance(blob, dict):
return None
entry = blob.get(base)
if not isinstance(entry, dict):
return None
data = entry.get("data")
return data if isinstance(data, dict) and data else None
def _write_nous_recommended_disk(base: str, data: dict[str, Any]) -> None:
"""Persist ``data`` as the last-known-good payload for ``base``.
Merges into any existing per-base map, then writes atomically. Failures
are non-fatal (logged at debug) — the in-process cache still works.
"""
if not data:
return
path = _nous_recommended_disk_path()
try:
try:
with open(path, encoding="utf-8") as fh:
blob = json.load(fh)
if not isinstance(blob, dict):
blob = {}
except (OSError, json.JSONDecodeError):
blob = {}
blob[base] = {"data": data, "ts": time.time()}
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(path.suffix + ".tmp")
with open(tmp, "w", encoding="utf-8") as fh:
json.dump(blob, fh, indent=2)
fh.write("\n")
os.replace(tmp, path)
except OSError as exc:
import logging
logging.getLogger(__name__).debug(
"nous recommended-models disk cache write failed: %s", exc
)
def fetch_nous_recommended_models(
portal_base_url: str = "",
timeout: float = 5.0,
*,
force_refresh: bool = False,
) -> dict[str, Any]:
"""Fetch the Nous Portal's curated recommended-models payload.
Hits ``/api/nous/recommended-models``. The endpoint is public —
no auth is required. Results are cached per portal URL for
``_NOUS_RECOMMENDED_CACHE_TTL`` seconds in process; pass
``force_refresh=True`` to bypass the in-process cache.
A successful live fetch is also persisted to a per-base disk cache
(``$HERMES_HOME/cache/nous_recommended_cache.json``) as last-known-good.
When the live fetch fails (network, parse, non-2xx) and the in-process
cache is empty, the disk copy is returned instead of ``{}`` — so a
transient Portal hiccup no longer silently drops the free/paid model
recommendations from the picker. Self-heals on the next successful fetch.
Returns the parsed JSON dict, or ``{}`` only when neither the network nor
any cache layer can supply data. Callers must treat missing/null fields
as "no recommendation" and fall back to their own default.
"""
base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
now = time.monotonic()
cached = _nous_recommended_cache.get(base)
if not force_refresh and cached is not None:
payload, cached_at = cached
if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
return payload
url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
try:
req = urllib.request.Request(
url,
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
if not isinstance(data, dict):
data = {}
except Exception:
data = {}
if data:
# Live fetch succeeded — refresh both cache layers.
_nous_recommended_cache[base] = (data, now)
_write_nous_recommended_disk(base, data)
return data
# Live fetch failed. Fall back to the last-known-good disk copy so a
# transient Portal hiccup doesn't drop the recommendations entirely.
disk = _read_nous_recommended_disk(base)
if disk:
_nous_recommended_cache[base] = (disk, now)
return disk
_nous_recommended_cache[base] = (data, now)
return data
def _resolve_nous_portal_url() -> str:
"""Best-effort lookup of the Portal base URL the user is authed against."""
try:
from hermes_cli.auth import (
DEFAULT_NOUS_PORTAL_URL,
get_provider_auth_state,
)
state = get_provider_auth_state("nous") or {}
portal = str(state.get("portal_base_url") or "").strip()
if portal:
return portal.rstrip("/")
return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
except Exception:
return "https://portal.nousresearch.com"
def _extract_model_name(entry: Any) -> Optional[str]:
"""Pull the ``modelName`` field from a recommended-model entry, else None."""
if not isinstance(entry, dict):
return None
model_name = entry.get("modelName")
if isinstance(model_name, str) and model_name.strip():
return model_name.strip()
return None
def get_nous_recommended_aux_model(
*,
vision: bool = False,
free_tier: Optional[bool] = None,
portal_base_url: str = "",
force_refresh: bool = False,
) -> Optional[str]:
"""Return the Portal's recommended model name for an auxiliary task.
Picks the best field from the Portal's recommended-models payload:
* ``vision=True`` → ``paidRecommendedVisionModel`` (paid tier) or
``freeRecommendedVisionModel`` (free tier)
* ``vision=False`` → ``paidRecommendedCompactionModel`` or
``freeRecommendedCompactionModel``
When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
detection — useful for tests or when the caller already knows the tier.
For paid-tier users we prefer the paid recommendation but gracefully fall
back to the free recommendation if the Portal returned ``null`` for the
paid field (common during the staged rollout of new paid models).
Returns ``None`` when every candidate is missing, null, or the fetch
fails — callers should fall back to their own default (currently
``google/gemini-3-flash-preview``).
"""
base = portal_base_url or _resolve_nous_portal_url()
payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
if not payload:
return None
if free_tier is None:
try:
free_tier = check_nous_free_tier()
except Exception:
# On any detection error, assume paid — paid users see both fields
# anyway so this is a safe default that maximises model quality.
free_tier = False
if vision:
paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
else:
paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
# Preference order:
# free tier → free only
# paid tier → paid, then free (if paid field is null)
candidates = [free_key] if free_tier else [paid_key, free_key]
for key in candidates:
name = _extract_model_name(payload.get(key))
if name:
return name
return None
# ---------------------------------------------------------------------------
# Canonical provider list — single source of truth for provider identity.
# Every code path that lists, displays, or iterates providers derives from
# this list: hermes model, /model, list_authenticated_providers.
#
# Fields:
# slug — internal provider ID (used in config.yaml, --provider flag)
# label — short display name
# tui_desc — longer description for the `hermes model` interactive picker
# ---------------------------------------------------------------------------
class ProviderEntry(NamedTuple):
slug: str
label: str
tui_desc: str # detailed description for `hermes model` TUI
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Everything your agent needs, 300+ models with bundled tool use)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (Pay-per-use API aggregator)"),
ProviderEntry("novita", "NovitaAI", "NovitaAI (Cloud: Model API, Agent Sandbox, GPU Cloud)"),
ProviderEntry("lmstudio", "LM Studio", "LM Studio (Local desktop app with built-in model server)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models via API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex (Codex CLI via ChatGPT subscription or API key)"),
ProviderEntry("openai-api", "OpenAI API", "OpenAI API (api.openai.com, API key)"),
ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope (Qwen + multi-provider)"),
ProviderEntry("xai-oauth", "xAI Grok OAuth (SuperGrok / Premium+)", "xAI Grok OAuth (SuperGrok / Premium+ subscription)"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models: pro, omni, flash)"),
ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview via tokenhub.tencentmaas.com)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models via build.nvidia.com or local NIM)"),
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (Uses GITHUB_TOKEN or gh auth token)"),
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (Spawns copilot --acp --stdio)"),
ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers"),
ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Native Gemini API)"),
ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (Code Assist OAuth flow)"),
ProviderEntry("deepseek", "DeepSeek", "DeepSeek (V3, R1, coder, direct API)"),
ProviderEntry("xai", "xAI", "xAI Grok (Direct API)"),
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu direct API)"),
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com & Moonshot API)"),
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Domestic direct API)"),
ProviderEntry("stepfun", "StepFun Step Plan", "StepFun Step Plan (Agent / coding models via Step Plan API)"),
ProviderEntry("minimax", "MiniMax", "MiniMax (Global direct API)"),
ProviderEntry("minimax-oauth", "MiniMax (OAuth)", "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (Domestic direct API)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (Cloud-hosted open models, ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models, direct API)"),
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (Multi-model direct API)"),
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (Curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (Open models subscription)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek; IAM or API key)"),
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint, your Azure AI deployment)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (Reuses local Qwen CLI login)"),
]
# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
# that is not already in the list above. Adding plugins/model-providers//
# is sufficient to expose a new provider in the model picker, /model, and all
# downstream consumers — no edits to this file needed.
_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
try:
from providers import list_providers as _list_providers_for_canonical
for _pp in _list_providers_for_canonical():
if _pp.name in _canonical_slugs:
continue
if _pp.auth_type in {"oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"}:
continue # non-api-key flows need bespoke picker UX; skip auto-inject
_label = _pp.display_name or _pp.name
_desc = _pp.description or f"{_label} (direct API)"
CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
_canonical_slugs.add(_pp.name)
except Exception:
pass
# Derived dicts — used throughout the codebase
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
# ---------------------------------------------------------------------------
# Provider groups — DISPLAY ONLY
#
# Some vendors expose several Hermes provider slugs (one per endpoint /
# auth method: global API, China API, OAuth coding plan, ...). Listing every
# slug as a top-level row in the interactive `hermes model` / setup wizard /
# Telegram `/model` pickers makes that list long and noisy.
#
# These groups fold related slugs under one top-level row in INTERACTIVE
# PICKERS only. They do NOT change ``CANONICAL_PROVIDERS``, slug identity,
# the ``--provider`` flag, ``/model ``, or any typed path —
# every member slug remains individually addressable. Grouping is a pure
# display affordance; ``group_providers()`` is the single fold used by all
# three picker surfaces so they stay consistent.
#
# group_id -> (display_label, group_description, [member_slug, ...])
#
# ``group_description`` is a short blurb shown on the collapsed top-level group
# row in the interactive pickers (alongside the label). Member-specific detail
# lives in each member's ``tui_desc`` and shows in the drill-down sub-picker.
# Member order is the order shown inside the group submenu.
# ---------------------------------------------------------------------------
PROVIDER_GROUPS: dict[str, tuple[str, str, list[str]]] = {
"kimi": ("Kimi / Moonshot", "Coding Plan, Moonshot global & China endpoints", ["kimi-coding", "kimi-coding-cn"]),
"minimax": ("MiniMax", "Global, OAuth Coding Plan & China endpoints", ["minimax", "minimax-oauth", "minimax-cn"]),
"xai": ("xAI Grok", "Direct API or SuperGrok / Premium+ OAuth", ["xai", "xai-oauth"]),
"google": ("Google Gemini", "AI Studio API or OAuth + Code Assist", ["gemini", "google-gemini-cli"]),
"openai": ("OpenAI", "Codex CLI or direct OpenAI API", ["openai-codex", "openai-api"]),
"opencode": ("OpenCode", "Zen pay-as-you-go or Go subscription", ["opencode-zen", "opencode-go"]),
"copilot": ("GitHub Copilot", "GitHub token API or copilot --acp process", ["copilot", "copilot-acp"]),
}
# Reverse index: member slug -> group_id. Built once at import.
_SLUG_TO_GROUP: dict[str, str] = {
slug: gid for gid, (_label, _desc, members) in PROVIDER_GROUPS.items() for slug in members
}
def provider_group_for_slug(slug: str) -> str:
"""Return the group_id a provider slug belongs to, or "" if ungrouped."""
return _SLUG_TO_GROUP.get(str(slug or "").strip().lower(), "")
def group_providers(slugs):
"""Fold a flat ordered slug iterable into picker rows by provider group.
DISPLAY ONLY. Used by every interactive picker (``hermes model``, the
setup wizard, the Telegram ``/model`` keyboard) so grouping is identical
across surfaces.
Each returned row is a dict::
{"kind": "single", "slug": } # ungrouped, or
# 1-member group
{"kind": "group", "group_id": , "label":