From e85592591e8028cceecb0ea2b4992a1643b52f93 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 11 May 2026 18:08:16 -0700 Subject: [PATCH] fix(nous): surface Portal-flagged free models in picker even when curated list is stale (#24082) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free-tier users were seeing 'No free models currently available.' in the `hermes model` and post-login pickers even though qwen/qwen3.6-plus is free on the Portal right now. Three independent breakages compounded: 1. The docs-hosted catalog manifest at website/static/api/model-catalog.json was not regenerated when _PROVIDER_MODELS['nous'] was updated, so users fetching the manifest got a list that didn't include qwen/qwen3.6-plus. 2. _resolve_nous_pricing_credentials() returned ('', '') on any auth blip, collapsing get_pricing_for_provider('nous') to {} and making every curated model fall through the free-tier filter as 'paid'. 3. Even with healthy pricing, the picker only ever showed models from the in-repo curated list intersected with live pricing — a Portal-flagged free model not yet in the curated list could never appear. Changes: - hermes_cli/models.py: new union_with_portal_free_recommendations() that augments the curated list with Portal freeRecommendedModels entries (with synthetic free pricing so partition keeps them). The Portal's /api/nous/recommended-models endpoint is now the source of truth for free-tier surfacing — old Hermes builds will see new free models without a CLI release. - hermes_cli/models.py: _resolve_nous_pricing_credentials() falls back to the public inference base URL when runtime cred resolution fails. The /v1/models endpoint exposes pricing without auth, so silently returning {} just because a refresh token expired was wrong. - hermes_cli/auth.py + hermes_cli/main.py: both free-tier picker call sites call union_with_portal_free_recommendations() before partition. - tests/hermes_cli/test_models.py: 7 tests covering union behaviour (prepend, dedup, end-to-end with stale pricing, empty/missing/error payloads, invalid entries). - tests/hermes_cli/test_model_catalog.py: drift guard TestManifestMatchesInRepoLists fails CI when _PROVIDER_MODELS['nous'] or OPENROUTER_MODELS is edited without re-running scripts/build_model_catalog.py. Verified empirically that removing a manifest entry triggers an assertion with an actionable error message. Validation: - 133/133 targeted tests pass (test_models, test_model_catalog, test_auth_nous_provider). - Live E2E against the real Portal: - Stale curated list ['claude-opus','claude-sonnet','gpt-5.4'] (no qwen) → after union: ['qwen/qwen3.6-plus', ...] → partition(free_tier=True): selectable=['qwen/qwen3.6-plus']. - Simulated expired refresh token → anon fetch returns 403 pricing entries including qwen/qwen3.6-plus -> {prompt:0, completion:0}. - ruff: clean. --- hermes_cli/auth.py | 10 ++ hermes_cli/main.py | 34 ++++--- hermes_cli/models.py | 82 ++++++++++++++++- tests/hermes_cli/test_model_catalog.py | 55 +++++++++++ tests/hermes_cli/test_models.py | 123 +++++++++++++++++++++++++ 5 files changed, 289 insertions(+), 15 deletions(-) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 7db897cb55b..6fda05d8fd3 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -5251,6 +5251,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: from hermes_cli.models import ( get_curated_nous_model_ids, get_pricing_for_provider, check_nous_free_tier, partition_nous_models_by_tier, + union_with_portal_free_recommendations, ) model_ids = get_curated_nous_model_ids() @@ -5260,6 +5261,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: pricing = get_pricing_for_provider("nous") free_tier = check_nous_free_tier() if free_tier: + # The Portal's freeRecommendedModels endpoint is the + # source of truth for what's free *right now*. Augment + # the curated list with anything new the Portal flags + # as free so users on older Hermes builds still see + # newly-launched free models without a CLI release. + _portal_for_recs = auth_state.get("portal_base_url", "") + model_ids, pricing = union_with_portal_free_recommendations( + model_ids, pricing, _portal_for_recs, + ) model_ids, unavailable_models = partition_nous_models_by_tier( model_ids, pricing, free_tier=True, ) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index f70b7ea9d95..3c0ab4c442a 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2589,6 +2589,7 @@ def _model_flow_nous(config, current_model="", args=None): get_pricing_for_provider, check_nous_free_tier, partition_nous_models_by_tier, + union_with_portal_free_recommendations, ) model_ids = get_curated_nous_model_ids() @@ -2629,19 +2630,8 @@ def _model_flow_nous(config, current_model="", args=None): # Check if user is on free tier free_tier = check_nous_free_tier() - # For free users: partition models into selectable/unavailable based on - # whether they are free per the Portal-reported pricing. - unavailable_models: list[str] = [] - if free_tier: - model_ids, unavailable_models = partition_nous_models_by_tier( - model_ids, pricing, free_tier=True - ) - - if not model_ids and not unavailable_models: - print("No models available for Nous Portal after filtering.") - return - - # Resolve portal URL for upgrade links (may differ on staging) + # Resolve portal URL early — needed both for upgrade links and for the + # freeRecommendedModels endpoint below. _nous_portal_url = "" try: _nous_state = get_provider_auth_state("nous") @@ -2650,6 +2640,24 @@ def _model_flow_nous(config, current_model="", args=None): except Exception: pass + # For free users: partition models into selectable/unavailable based on + # whether they are free per the Portal-reported pricing. First augment + # with the Portal's freeRecommendedModels list so newly-launched free + # models show up even if this CLI build's hardcoded curated list and + # docs-hosted manifest haven't caught up yet. + unavailable_models: list[str] = [] + if free_tier: + model_ids, pricing = union_with_portal_free_recommendations( + model_ids, pricing, _nous_portal_url, + ) + model_ids, unavailable_models = partition_nous_models_by_tier( + model_ids, pricing, free_tier=True + ) + + if not model_ids and not unavailable_models: + print("No models available for Nous Portal after filtering.") + return + if free_tier and not model_ids: print("No free models currently available.") if unavailable_models: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index c23bd397e3f..813045dfd04 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -556,6 +556,71 @@ def partition_nous_models_by_tier( return (selectable, unavailable) +def union_with_portal_free_recommendations( + curated_ids: list[str], + pricing: dict[str, dict[str, str]], + portal_base_url: str = "", + *, + force_refresh: bool = False, +) -> tuple[list[str], dict[str, dict[str, str]]]: + """Augment curated list + pricing with the Portal's ``freeRecommendedModels``. + + The Portal's ``/api/nous/recommended-models`` endpoint advertises which + models are free *right now* — independent of what the in-repo + ``_PROVIDER_MODELS["nous"]`` list happens to contain or whether the + docs-hosted catalog manifest has been rebuilt since the last release. + + For free-tier users this is the source of truth: any model the Portal + flags as free should be selectable, even if the user is running an + older Hermes that doesn't ship that model in its hardcoded curated + list. This function returns an augmented ``(model_ids, pricing)`` + pair where: + + * Portal free recommendations missing from ``curated_ids`` are + appended at the front (so the picker shows them first). + * ``pricing`` gets a synthetic ``{"prompt": "0", "completion": "0"}`` + entry for any free recommendation missing from the live pricing + map, so :func:`partition_nous_models_by_tier` keeps it. + + Failures (network, parse, missing field) are silent and degrade to + returning the inputs unchanged. + """ + try: + payload = fetch_nous_recommended_models( + portal_base_url, force_refresh=force_refresh + ) + except Exception: + return (list(curated_ids), dict(pricing)) + + free_block = payload.get("freeRecommendedModels") if isinstance(payload, dict) else None + if not isinstance(free_block, list) or not free_block: + return (list(curated_ids), dict(pricing)) + + portal_free_ids: list[str] = [] + for entry in free_block: + name = _extract_model_name(entry) + if name: + portal_free_ids.append(name) + if not portal_free_ids: + return (list(curated_ids), dict(pricing)) + + augmented_pricing = dict(pricing) + free_synthetic = {"prompt": "0", "completion": "0"} + for mid in portal_free_ids: + if mid not in augmented_pricing: + augmented_pricing[mid] = dict(free_synthetic) + + augmented_ids = list(curated_ids) + seen = set(augmented_ids) + # Prepend Portal free recommendations that aren't already curated, so + # they appear first in the picker. + new_ones = [mid for mid in portal_free_ids if mid not in seen] + if new_ones: + augmented_ids = new_ones + augmented_ids + + return (augmented_ids, augmented_pricing) + + # --------------------------------------------------------------------------- # TTL cache for free-tier detection — avoids repeated API calls within a # session while still picking up upgrades quickly. @@ -1338,8 +1403,21 @@ def _resolve_openrouter_api_key() -> str: return os.getenv("OPENROUTER_API_KEY", "").strip() +_DEFAULT_NOUS_INFERENCE_BASE = "https://inference-api.nousresearch.com" + + def _resolve_nous_pricing_credentials() -> tuple[str, str]: - """Return ``(api_key, base_url)`` for Nous Portal pricing, or empty strings.""" + """Return ``(api_key, base_url)`` for Nous Portal pricing. + + The Nous inference ``/v1/models`` endpoint exposes pricing without + authentication, so the api_key is best-effort: when runtime credential + resolution fails (expired refresh token, missing auth.json, etc.) we + still return the default inference base URL so the picker keeps + working with anonymous pricing data. Free-tier users in particular + need this — pricing drives the free/paid partition, and silently + returning empty pricing because of an auth blip makes the picker + look broken ("No free models currently available"). + """ try: from hermes_cli.auth import resolve_nous_runtime_credentials creds = resolve_nous_runtime_credentials() @@ -1347,7 +1425,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]: return (creds.get("api_key", ""), creds.get("base_url", "")) except Exception: pass - return ("", "") + return ("", _DEFAULT_NOUS_INFERENCE_BASE) def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]: diff --git a/tests/hermes_cli/test_model_catalog.py b/tests/hermes_cli/test_model_catalog.py index 8910705c74d..d4a4b7237a8 100644 --- a/tests/hermes_cli/test_model_catalog.py +++ b/tests/hermes_cli/test_model_catalog.py @@ -328,3 +328,58 @@ class TestIntegrationWithModelsModule: "anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6", ] + + +# ----------------------------------------------------------------------------- +# Drift guard — prevent the in-repo curated lists from going out of sync with +# the docs-hosted manifest at website/static/api/model-catalog.json. +# +# History: qwen/qwen3.6-plus was added to _PROVIDER_MODELS["nous"] in commit +# 9dd6e5510 but website/static/api/model-catalog.json was not regenerated for +# weeks, so free-tier users on a new install fetched a stale manifest and the +# free-tier picker showed "No free models currently available." even though +# the Portal was serving qwen/qwen3.6-plus as free. CI must catch this. +# ----------------------------------------------------------------------------- + + +class TestManifestMatchesInRepoLists: + """Fail if the on-disk manifest is out of date relative to in-repo lists.""" + + @staticmethod + def _strip_volatile(catalog: dict) -> dict: + """Drop fields that always change (timestamps) for diff comparison.""" + out = dict(catalog) + out.pop("updated_at", None) + return out + + def test_in_repo_lists_match_manifest(self): + """``scripts/build_model_catalog.py`` output must match the committed file. + + If this fails, run ``python scripts/build_model_catalog.py`` and + commit the regenerated ``website/static/api/model-catalog.json``. + """ + # Resolve the repo root from this test file's location. + repo_root = Path(__file__).resolve().parents[2] + manifest_path = repo_root / "website" / "static" / "api" / "model-catalog.json" + + if not manifest_path.exists(): + pytest.skip(f"manifest missing at {manifest_path}") + + # Build expected catalog using the same script CI would. + import importlib.util + script_path = repo_root / "scripts" / "build_model_catalog.py" + spec = importlib.util.spec_from_file_location("_build_model_catalog", script_path) + mod = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(mod) + expected = mod.build_catalog() + + with open(manifest_path, encoding="utf-8") as fh: + actual = json.load(fh) + + assert self._strip_volatile(actual) == self._strip_volatile(expected), ( + "website/static/api/model-catalog.json is out of sync with " + "_PROVIDER_MODELS['nous'] / OPENROUTER_MODELS. " + "Run: python scripts/build_model_catalog.py && " + "git add website/static/api/model-catalog.json" + ) diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py index d0201a3e802..668105bf10d 100644 --- a/tests/hermes_cli/test_models.py +++ b/tests/hermes_cli/test_models.py @@ -6,6 +6,7 @@ from hermes_cli.models import ( OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model, is_nous_free_tier, partition_nous_models_by_tier, check_nous_free_tier, _FREE_TIER_CACHE_TTL, + union_with_portal_free_recommendations, ) import hermes_cli.models as _models_mod @@ -383,6 +384,128 @@ class TestPartitionNousModelsByTier: assert unav == models +class TestUnionWithPortalFreeRecommendations: + """Tests for union_with_portal_free_recommendations. + + The Portal's freeRecommendedModels endpoint is the source of truth for + what's free *right now* — the in-repo curated list and docs-hosted + manifest can lag. This helper guarantees the picker still surfaces + Portal-flagged free models even when the rest of the catalog is stale. + """ + + _PAID = {"prompt": "0.000003", "completion": "0.000015"} + _FREE = {"prompt": "0", "completion": "0"} + + def _payload(self, free_models: list[str]) -> dict: + return { + "freeRecommendedModels": [ + {"modelName": mid, "displayName": mid} for mid in free_models + ], + } + + def test_adds_portal_free_model_missing_from_curated(self): + """A Portal-advertised free model not in curated is prepended + priced free.""" + curated = ["anthropic/claude-opus-4.6"] + pricing = {"anthropic/claude-opus-4.6": self._PAID} + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value=self._payload(["qwen/qwen3.6-plus"]), + ): + ids, p = union_with_portal_free_recommendations(curated, pricing, "") + + assert ids[0] == "qwen/qwen3.6-plus" # prepended + assert "anthropic/claude-opus-4.6" in ids + # Synthetic free pricing entry created + assert p["qwen/qwen3.6-plus"] == self._FREE + # Existing pricing untouched + assert p["anthropic/claude-opus-4.6"] == self._PAID + + def test_does_not_duplicate_curated_entries(self): + """A Portal free model already in curated is not duplicated.""" + curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"] + pricing = { + "qwen/qwen3.6-plus": self._FREE, + "anthropic/claude-opus-4.6": self._PAID, + } + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value=self._payload(["qwen/qwen3.6-plus"]), + ): + ids, p = union_with_portal_free_recommendations(curated, pricing, "") + + assert ids == curated + assert p == pricing + + def test_then_partition_keeps_portal_free_model(self): + """End-to-end: Portal-flagged free model survives partition.""" + # Simulate the broken-state-before-this-fix: in-repo curated list + # contains qwen/qwen3.6-plus (because new builds shipped it) but + # live pricing endpoint hasn't published its zero-cost entry yet. + # The Portal's freeRecommendedModels still flags it as free. + curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"] + pricing = {"anthropic/claude-opus-4.6": self._PAID} # qwen missing! + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value=self._payload(["qwen/qwen3.6-plus"]), + ): + ids, p = union_with_portal_free_recommendations(curated, pricing, "") + sel, unav = partition_nous_models_by_tier(ids, p, free_tier=True) + assert "qwen/qwen3.6-plus" in sel + assert "anthropic/claude-opus-4.6" in unav + + def test_empty_payload_returns_inputs_unchanged(self): + """Empty Portal response leaves curated + pricing untouched.""" + curated = ["a", "b"] + pricing = {"a": self._PAID} + with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}): + ids, p = union_with_portal_free_recommendations(curated, pricing, "") + assert ids == curated + assert p == pricing + + def test_missing_freeRecommendedModels_key(self): + """Portal payload without freeRecommendedModels degrades gracefully.""" + curated = ["a"] + pricing = {"a": self._PAID} + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value={"paidRecommendedModels": [{"modelName": "x"}]}, + ): + ids, p = union_with_portal_free_recommendations(curated, pricing, "") + assert ids == curated + assert p == pricing + + def test_fetch_failure_returns_inputs(self): + """Network failures don't blow up the picker.""" + curated = ["a"] + pricing = {"a": self._PAID} + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + side_effect=RuntimeError("network down"), + ): + ids, p = union_with_portal_free_recommendations(curated, pricing, "") + assert ids == curated + assert p == pricing + + def test_invalid_entries_skipped(self): + """Non-dict / missing-modelName entries are filtered out.""" + curated = ["a"] + pricing = {"a": self._PAID} + with patch( + "hermes_cli.models.fetch_nous_recommended_models", + return_value={ + "freeRecommendedModels": [ + "not-a-dict", + {"displayName": "no-modelName"}, + {"modelName": ""}, + {"modelName": "qwen/qwen3.6-plus"}, + ] + }, + ): + ids, p = union_with_portal_free_recommendations(curated, pricing, "") + assert ids == ["qwen/qwen3.6-plus", "a"] + assert p["qwen/qwen3.6-plus"] == self._FREE + + class TestCheckNousFreeTierCache: """Tests for the TTL cache on check_nous_free_tier()."""