fix(nous): surface Portal-flagged free models in picker even when curated list is stale (#24082)

Free-tier users were seeing 'No free models currently available.' in the
`hermes model` and post-login pickers even though qwen/qwen3.6-plus is
free on the Portal right now. Three independent breakages compounded:

1. The docs-hosted catalog manifest at website/static/api/model-catalog.json
   was not regenerated when _PROVIDER_MODELS['nous'] was updated, so users
   fetching the manifest got a list that didn't include qwen/qwen3.6-plus.
2. _resolve_nous_pricing_credentials() returned ('', '') on any auth blip,
   collapsing get_pricing_for_provider('nous') to {} and making every
   curated model fall through the free-tier filter as 'paid'.
3. Even with healthy pricing, the picker only ever showed models from the
   in-repo curated list intersected with live pricing — a Portal-flagged
   free model not yet in the curated list could never appear.

Changes:
- hermes_cli/models.py: new union_with_portal_free_recommendations() that
  augments the curated list with Portal freeRecommendedModels entries
  (with synthetic free pricing so partition keeps them). The Portal's
  /api/nous/recommended-models endpoint is now the source of truth for
  free-tier surfacing — old Hermes builds will see new free models
  without a CLI release.
- hermes_cli/models.py: _resolve_nous_pricing_credentials() falls back to
  the public inference base URL when runtime cred resolution fails.
  The /v1/models endpoint exposes pricing without auth, so silently
  returning {} just because a refresh token expired was wrong.
- hermes_cli/auth.py + hermes_cli/main.py: both free-tier picker call
  sites call union_with_portal_free_recommendations() before partition.
- tests/hermes_cli/test_models.py: 7 tests covering union behaviour
  (prepend, dedup, end-to-end with stale pricing, empty/missing/error
  payloads, invalid entries).
- tests/hermes_cli/test_model_catalog.py: drift guard
  TestManifestMatchesInRepoLists fails CI when _PROVIDER_MODELS['nous']
  or OPENROUTER_MODELS is edited without re-running
  scripts/build_model_catalog.py. Verified empirically that removing a
  manifest entry triggers an assertion with an actionable error message.

Validation:
- 133/133 targeted tests pass (test_models, test_model_catalog,
  test_auth_nous_provider).
- Live E2E against the real Portal:
  - Stale curated list ['claude-opus','claude-sonnet','gpt-5.4'] (no
    qwen) → after union: ['qwen/qwen3.6-plus', ...] →
    partition(free_tier=True): selectable=['qwen/qwen3.6-plus'].
  - Simulated expired refresh token → anon fetch returns 403 pricing
    entries including qwen/qwen3.6-plus -> {prompt:0, completion:0}.
- ruff: clean.
This commit is contained in:
Teknium 2026-05-11 18:08:16 -07:00 committed by GitHub
parent ced1990c1c
commit e85592591e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 289 additions and 15 deletions

View file

@ -328,3 +328,58 @@ class TestIntegrationWithModelsModule:
"anthropic/claude-opus-4.7",
"moonshotai/kimi-k2.6",
]
# -----------------------------------------------------------------------------
# Drift guard — prevent the in-repo curated lists from going out of sync with
# the docs-hosted manifest at website/static/api/model-catalog.json.
#
# History: qwen/qwen3.6-plus was added to _PROVIDER_MODELS["nous"] in commit
# 9dd6e5510 but website/static/api/model-catalog.json was not regenerated for
# weeks, so free-tier users on a new install fetched a stale manifest and the
# free-tier picker showed "No free models currently available." even though
# the Portal was serving qwen/qwen3.6-plus as free. CI must catch this.
# -----------------------------------------------------------------------------
class TestManifestMatchesInRepoLists:
"""Fail if the on-disk manifest is out of date relative to in-repo lists."""
@staticmethod
def _strip_volatile(catalog: dict) -> dict:
"""Drop fields that always change (timestamps) for diff comparison."""
out = dict(catalog)
out.pop("updated_at", None)
return out
def test_in_repo_lists_match_manifest(self):
"""``scripts/build_model_catalog.py`` output must match the committed file.
If this fails, run ``python scripts/build_model_catalog.py`` and
commit the regenerated ``website/static/api/model-catalog.json``.
"""
# Resolve the repo root from this test file's location.
repo_root = Path(__file__).resolve().parents[2]
manifest_path = repo_root / "website" / "static" / "api" / "model-catalog.json"
if not manifest_path.exists():
pytest.skip(f"manifest missing at {manifest_path}")
# Build expected catalog using the same script CI would.
import importlib.util
script_path = repo_root / "scripts" / "build_model_catalog.py"
spec = importlib.util.spec_from_file_location("_build_model_catalog", script_path)
mod = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(mod)
expected = mod.build_catalog()
with open(manifest_path, encoding="utf-8") as fh:
actual = json.load(fh)
assert self._strip_volatile(actual) == self._strip_volatile(expected), (
"website/static/api/model-catalog.json is out of sync with "
"_PROVIDER_MODELS['nous'] / OPENROUTER_MODELS. "
"Run: python scripts/build_model_catalog.py && "
"git add website/static/api/model-catalog.json"
)

View file

@ -6,6 +6,7 @@ from hermes_cli.models import (
OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
is_nous_free_tier, partition_nous_models_by_tier,
check_nous_free_tier, _FREE_TIER_CACHE_TTL,
union_with_portal_free_recommendations,
)
import hermes_cli.models as _models_mod
@ -383,6 +384,128 @@ class TestPartitionNousModelsByTier:
assert unav == models
class TestUnionWithPortalFreeRecommendations:
"""Tests for union_with_portal_free_recommendations.
The Portal's freeRecommendedModels endpoint is the source of truth for
what's free *right now* — the in-repo curated list and docs-hosted
manifest can lag. This helper guarantees the picker still surfaces
Portal-flagged free models even when the rest of the catalog is stale.
"""
_PAID = {"prompt": "0.000003", "completion": "0.000015"}
_FREE = {"prompt": "0", "completion": "0"}
def _payload(self, free_models: list[str]) -> dict:
return {
"freeRecommendedModels": [
{"modelName": mid, "displayName": mid} for mid in free_models
],
}
def test_adds_portal_free_model_missing_from_curated(self):
"""A Portal-advertised free model not in curated is prepended + priced free."""
curated = ["anthropic/claude-opus-4.6"]
pricing = {"anthropic/claude-opus-4.6": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["qwen/qwen3.6-plus"]),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids[0] == "qwen/qwen3.6-plus" # prepended
assert "anthropic/claude-opus-4.6" in ids
# Synthetic free pricing entry created
assert p["qwen/qwen3.6-plus"] == self._FREE
# Existing pricing untouched
assert p["anthropic/claude-opus-4.6"] == self._PAID
def test_does_not_duplicate_curated_entries(self):
"""A Portal free model already in curated is not duplicated."""
curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"]
pricing = {
"qwen/qwen3.6-plus": self._FREE,
"anthropic/claude-opus-4.6": self._PAID,
}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["qwen/qwen3.6-plus"]),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_then_partition_keeps_portal_free_model(self):
"""End-to-end: Portal-flagged free model survives partition."""
# Simulate the broken-state-before-this-fix: in-repo curated list
# contains qwen/qwen3.6-plus (because new builds shipped it) but
# live pricing endpoint hasn't published its zero-cost entry yet.
# The Portal's freeRecommendedModels still flags it as free.
curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"]
pricing = {"anthropic/claude-opus-4.6": self._PAID} # qwen missing!
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["qwen/qwen3.6-plus"]),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
sel, unav = partition_nous_models_by_tier(ids, p, free_tier=True)
assert "qwen/qwen3.6-plus" in sel
assert "anthropic/claude-opus-4.6" in unav
def test_empty_payload_returns_inputs_unchanged(self):
"""Empty Portal response leaves curated + pricing untouched."""
curated = ["a", "b"]
pricing = {"a": self._PAID}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_missing_freeRecommendedModels_key(self):
"""Portal payload without freeRecommendedModels degrades gracefully."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value={"paidRecommendedModels": [{"modelName": "x"}]},
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_fetch_failure_returns_inputs(self):
"""Network failures don't blow up the picker."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
side_effect=RuntimeError("network down"),
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_invalid_entries_skipped(self):
"""Non-dict / missing-modelName entries are filtered out."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value={
"freeRecommendedModels": [
"not-a-dict",
{"displayName": "no-modelName"},
{"modelName": ""},
{"modelName": "qwen/qwen3.6-plus"},
]
},
):
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
assert ids == ["qwen/qwen3.6-plus", "a"]
assert p["qwen/qwen3.6-plus"] == self._FREE
class TestCheckNousFreeTierCache:
"""Tests for the TTL cache on check_nous_free_tier()."""