fix(nous): surface Portal-flagged free models in picker even when curated list is stale (#24082)

Free-tier users were seeing 'No free models currently available.' in the
`hermes model` and post-login pickers even though qwen/qwen3.6-plus is
free on the Portal right now. Three independent breakages compounded:

1. The docs-hosted catalog manifest at website/static/api/model-catalog.json
   was not regenerated when _PROVIDER_MODELS['nous'] was updated, so users
   fetching the manifest got a list that didn't include qwen/qwen3.6-plus.
2. _resolve_nous_pricing_credentials() returned ('', '') on any auth blip,
   collapsing get_pricing_for_provider('nous') to {} and making every
   curated model fall through the free-tier filter as 'paid'.
3. Even with healthy pricing, the picker only ever showed models from the
   in-repo curated list intersected with live pricing — a Portal-flagged
   free model not yet in the curated list could never appear.

Changes:
- hermes_cli/models.py: new union_with_portal_free_recommendations() that
  augments the curated list with Portal freeRecommendedModels entries
  (with synthetic free pricing so partition keeps them). The Portal's
  /api/nous/recommended-models endpoint is now the source of truth for
  free-tier surfacing — old Hermes builds will see new free models
  without a CLI release.
- hermes_cli/models.py: _resolve_nous_pricing_credentials() falls back to
  the public inference base URL when runtime cred resolution fails.
  The /v1/models endpoint exposes pricing without auth, so silently
  returning {} just because a refresh token expired was wrong.
- hermes_cli/auth.py + hermes_cli/main.py: both free-tier picker call
  sites call union_with_portal_free_recommendations() before partition.
- tests/hermes_cli/test_models.py: 7 tests covering union behaviour
  (prepend, dedup, end-to-end with stale pricing, empty/missing/error
  payloads, invalid entries).
- tests/hermes_cli/test_model_catalog.py: drift guard
  TestManifestMatchesInRepoLists fails CI when _PROVIDER_MODELS['nous']
  or OPENROUTER_MODELS is edited without re-running
  scripts/build_model_catalog.py. Verified empirically that removing a
  manifest entry triggers an assertion with an actionable error message.

Validation:
- 133/133 targeted tests pass (test_models, test_model_catalog,
  test_auth_nous_provider).
- Live E2E against the real Portal:
  - Stale curated list ['claude-opus','claude-sonnet','gpt-5.4'] (no
    qwen) → after union: ['qwen/qwen3.6-plus', ...] →
    partition(free_tier=True): selectable=['qwen/qwen3.6-plus'].
  - Simulated expired refresh token → anon fetch returns 403 pricing
    entries including qwen/qwen3.6-plus -> {prompt:0, completion:0}.
- ruff: clean.
This commit is contained in:
Teknium 2026-05-11 18:08:16 -07:00 committed by GitHub
parent ced1990c1c
commit e85592591e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 289 additions and 15 deletions

View file

@ -5251,6 +5251,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
from hermes_cli.models import (
get_curated_nous_model_ids, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
union_with_portal_free_recommendations,
)
model_ids = get_curated_nous_model_ids()
@ -5260,6 +5261,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
pricing = get_pricing_for_provider("nous")
free_tier = check_nous_free_tier()
if free_tier:
# The Portal's freeRecommendedModels endpoint is the
# source of truth for what's free *right now*. Augment
# the curated list with anything new the Portal flags
# as free so users on older Hermes builds still see
# newly-launched free models without a CLI release.
_portal_for_recs = auth_state.get("portal_base_url", "")
model_ids, pricing = union_with_portal_free_recommendations(
model_ids, pricing, _portal_for_recs,
)
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True,
)

View file

@ -2589,6 +2589,7 @@ def _model_flow_nous(config, current_model="", args=None):
get_pricing_for_provider,
check_nous_free_tier,
partition_nous_models_by_tier,
union_with_portal_free_recommendations,
)
model_ids = get_curated_nous_model_ids()
@ -2629,19 +2630,8 @@ def _model_flow_nous(config, current_model="", args=None):
# Check if user is on free tier
free_tier = check_nous_free_tier()
# For free users: partition models into selectable/unavailable based on
# whether they are free per the Portal-reported pricing.
unavailable_models: list[str] = []
if free_tier:
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True
)
if not model_ids and not unavailable_models:
print("No models available for Nous Portal after filtering.")
return
# Resolve portal URL for upgrade links (may differ on staging)
# Resolve portal URL early — needed both for upgrade links and for the
# freeRecommendedModels endpoint below.
_nous_portal_url = ""
try:
_nous_state = get_provider_auth_state("nous")
@ -2650,6 +2640,24 @@ def _model_flow_nous(config, current_model="", args=None):
except Exception:
pass
# For free users: partition models into selectable/unavailable based on
# whether they are free per the Portal-reported pricing. First augment
# with the Portal's freeRecommendedModels list so newly-launched free
# models show up even if this CLI build's hardcoded curated list and
# docs-hosted manifest haven't caught up yet.
unavailable_models: list[str] = []
if free_tier:
model_ids, pricing = union_with_portal_free_recommendations(
model_ids, pricing, _nous_portal_url,
)
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True
)
if not model_ids and not unavailable_models:
print("No models available for Nous Portal after filtering.")
return
if free_tier and not model_ids:
print("No free models currently available.")
if unavailable_models:

View file

@ -556,6 +556,71 @@ def partition_nous_models_by_tier(
return (selectable, unavailable)
def union_with_portal_free_recommendations(
curated_ids: list[str],
pricing: dict[str, dict[str, str]],
portal_base_url: str = "",
*,
force_refresh: bool = False,
) -> tuple[list[str], dict[str, dict[str, str]]]:
"""Augment curated list + pricing with the Portal's ``freeRecommendedModels``.
The Portal's ``/api/nous/recommended-models`` endpoint advertises which
models are free *right now* independent of what the in-repo
``_PROVIDER_MODELS["nous"]`` list happens to contain or whether the
docs-hosted catalog manifest has been rebuilt since the last release.
For free-tier users this is the source of truth: any model the Portal
flags as free should be selectable, even if the user is running an
older Hermes that doesn't ship that model in its hardcoded curated
list. This function returns an augmented ``(model_ids, pricing)``
pair where:
* Portal free recommendations missing from ``curated_ids`` are
appended at the front (so the picker shows them first).
* ``pricing`` gets a synthetic ``{"prompt": "0", "completion": "0"}``
entry for any free recommendation missing from the live pricing
map, so :func:`partition_nous_models_by_tier` keeps it.
Failures (network, parse, missing field) are silent and degrade to
returning the inputs unchanged.
"""
try:
payload = fetch_nous_recommended_models(
portal_base_url, force_refresh=force_refresh
)
except Exception:
return (list(curated_ids), dict(pricing))
free_block = payload.get("freeRecommendedModels") if isinstance(payload, dict) else None
if not isinstance(free_block, list) or not free_block:
return (list(curated_ids), dict(pricing))
portal_free_ids: list[str] = []
for entry in free_block:
name = _extract_model_name(entry)
if name:
portal_free_ids.append(name)
if not portal_free_ids:
return (list(curated_ids), dict(pricing))
augmented_pricing = dict(pricing)
free_synthetic = {"prompt": "0", "completion": "0"}
for mid in portal_free_ids:
if mid not in augmented_pricing:
augmented_pricing[mid] = dict(free_synthetic)
augmented_ids = list(curated_ids)
seen = set(augmented_ids)
# Prepend Portal free recommendations that aren't already curated, so
# they appear first in the picker.
new_ones = [mid for mid in portal_free_ids if mid not in seen]
if new_ones:
augmented_ids = new_ones + augmented_ids
return (augmented_ids, augmented_pricing)
# ---------------------------------------------------------------------------
# TTL cache for free-tier detection — avoids repeated API calls within a
# session while still picking up upgrades quickly.
@ -1338,8 +1403,21 @@ def _resolve_openrouter_api_key() -> str:
return os.getenv("OPENROUTER_API_KEY", "").strip()
_DEFAULT_NOUS_INFERENCE_BASE = "https://inference-api.nousresearch.com"
def _resolve_nous_pricing_credentials() -> tuple[str, str]:
"""Return ``(api_key, base_url)`` for Nous Portal pricing, or empty strings."""
"""Return ``(api_key, base_url)`` for Nous Portal pricing.
The Nous inference ``/v1/models`` endpoint exposes pricing without
authentication, so the api_key is best-effort: when runtime credential
resolution fails (expired refresh token, missing auth.json, etc.) we
still return the default inference base URL so the picker keeps
working with anonymous pricing data. Free-tier users in particular
need this pricing drives the free/paid partition, and silently
returning empty pricing because of an auth blip makes the picker
look broken ("No free models currently available").
"""
try:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials()
@ -1347,7 +1425,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
return (creds.get("api_key", ""), creds.get("base_url", ""))
except Exception:
pass
return ("", "")
return ("", _DEFAULT_NOUS_INFERENCE_BASE)
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:

View file

@ -328,3 +328,58 @@ class TestIntegrationWithModelsModule:
"anthropic/claude-opus-4.7",
"moonshotai/kimi-k2.6",
]
# -----------------------------------------------------------------------------
# Drift guard — prevent the in-repo curated lists from going out of sync with
# the docs-hosted manifest at website/static/api/model-catalog.json.
#
# History: qwen/qwen3.6-plus was added to _PROVIDER_MODELS["nous"] in commit
# 9dd6e5510 but website/static/api/model-catalog.json was not regenerated for
# weeks, so free-tier users on a new install fetched a stale manifest and the
# free-tier picker showed "No free models currently available." even though
# the Portal was serving qwen/qwen3.6-plus as free. CI must catch this.
# -----------------------------------------------------------------------------
class TestManifestMatchesInRepoLists:
"""Fail if the on-disk manifest is out of date relative to in-repo lists."""
@staticmethod
def _strip_volatile(catalog: dict) -> dict:
"""Drop fields that always change (timestamps) for diff comparison."""
out = dict(catalog)
out.pop("updated_at", None)
return out
def test_in_repo_lists_match_manifest(self):
"""``scripts/build_model_catalog.py`` output must match the committed file.
If this fails, run ``python scripts/build_model_catalog.py`` and
commit the regenerated ``website/static/api/model-catalog.json``.
"""
# Resolve the repo root from this test file's location.
repo_root = Path(__file__).resolve().parents[2]
manifest_path = repo_root / "website" / "static" / "api" / "model-catalog.json"
if not manifest_path.exists():
pytest.skip(f"manifest missing at {manifest_path}")
# Build expected catalog using the same script CI would.
import importlib.util
script_path = repo_root / "scripts" / "build_model_catalog.py"
spec = importlib.util.spec_from_file_location("_build_model_catalog", script_path)
mod = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(mod)
expected = mod.build_catalog()
with open(manifest_path, encoding="utf-8") as fh:
actual = json.load(fh)
assert self._strip_volatile(actual) == self._strip_volatile(expected), (
"website/static/api/model-catalog.json is out of sync with "
"_PROVIDER_MODELS['nous'] / OPENROUTER_MODELS. "
"Run: python scripts/build_model_catalog.py && "
"git add website/static/api/model-catalog.json"
)

View file

@ -6,6 +6,7 @@ from hermes_cli.models import (
OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
is_nous_free_tier, partition_nous_models_by_tier,
check_nous_free_tier, _FREE_TIER_CACHE_TTL,
union_with_portal_free_recommendations,
)
import hermes_cli.models as _models_mod
@ -383,6 +384,128 @@ class TestPartitionNousModelsByTier:
assert unav == models
class TestUnionWithPortalFreeRecommendations:
"""Tests for union_with_portal_free_recommendations.
The Portal's freeRecommendedModels endpoint is the source of truth for
what's free *right now* — the in-repo curated list and docs-hosted
manifest can lag. This helper guarantees the picker still surfaces
Portal-flagged free models even when the rest of the catalog is stale.
"""
_PAID = {"prompt": "0.000003", "completion": "0.000015"}
_FREE = {"prompt": "0", "completion": "0"}
def _payload(self, free_models: list[str]) -> dict:
return {
"freeRecommendedModels": [
{"modelName": mid, "displayName": mid} for mid in free_models
],
}
def test_adds_portal_free_model_missing_from_curated(self):
"""A Portal-advertised free model not in curated is prepended + priced free."""
curated = ["anthropic/claude-opus-4.6"]
pricing = {"anthropic/claude-opus-4.6": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["qwen/qwen3.6-plus"]),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids[0] == "qwen/qwen3.6-plus" # prepended
assert "anthropic/claude-opus-4.6" in ids
# Synthetic free pricing entry created
assert p["qwen/qwen3.6-plus"] == self._FREE
# Existing pricing untouched
assert p["anthropic/claude-opus-4.6"] == self._PAID
def test_does_not_duplicate_curated_entries(self):
"""A Portal free model already in curated is not duplicated."""
curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"]
pricing = {
"qwen/qwen3.6-plus": self._FREE,
"anthropic/claude-opus-4.6": self._PAID,
}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["qwen/qwen3.6-plus"]),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_then_partition_keeps_portal_free_model(self):
"""End-to-end: Portal-flagged free model survives partition."""
# Simulate the broken-state-before-this-fix: in-repo curated list
# contains qwen/qwen3.6-plus (because new builds shipped it) but
# live pricing endpoint hasn't published its zero-cost entry yet.
# The Portal's freeRecommendedModels still flags it as free.
curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"]
pricing = {"anthropic/claude-opus-4.6": self._PAID} # qwen missing!
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["qwen/qwen3.6-plus"]),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
sel, unav = partition_nous_models_by_tier(ids, p, free_tier=True)
assert "qwen/qwen3.6-plus" in sel
assert "anthropic/claude-opus-4.6" in unav
def test_empty_payload_returns_inputs_unchanged(self):
"""Empty Portal response leaves curated + pricing untouched."""
curated = ["a", "b"]
pricing = {"a": self._PAID}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_missing_freeRecommendedModels_key(self):
"""Portal payload without freeRecommendedModels degrades gracefully."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value={"paidRecommendedModels": [{"modelName": "x"}]},
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_fetch_failure_returns_inputs(self):
"""Network failures don't blow up the picker."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
side_effect=RuntimeError("network down"),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_invalid_entries_skipped(self):
"""Non-dict / missing-modelName entries are filtered out."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value={
"freeRecommendedModels": [
"not-a-dict",
{"displayName": "no-modelName"},
{"modelName": ""},
{"modelName": "qwen/qwen3.6-plus"},
]
},
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == ["qwen/qwen3.6-plus", "a"]
assert p["qwen/qwen3.6-plus"] == self._FREE
class TestCheckNousFreeTierCache:
"""Tests for the TTL cache on check_nous_free_tier()."""