mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(nous): surface Portal-flagged free models in picker even when curated list is stale (#24082)
Free-tier users were seeing 'No free models currently available.' in the
`hermes model` and post-login pickers even though qwen/qwen3.6-plus is
free on the Portal right now. Three independent breakages compounded:
1. The docs-hosted catalog manifest at website/static/api/model-catalog.json
was not regenerated when _PROVIDER_MODELS['nous'] was updated, so users
fetching the manifest got a list that didn't include qwen/qwen3.6-plus.
2. _resolve_nous_pricing_credentials() returned ('', '') on any auth blip,
collapsing get_pricing_for_provider('nous') to {} and making every
curated model fall through the free-tier filter as 'paid'.
3. Even with healthy pricing, the picker only ever showed models from the
in-repo curated list intersected with live pricing — a Portal-flagged
free model not yet in the curated list could never appear.
Changes:
- hermes_cli/models.py: new union_with_portal_free_recommendations() that
augments the curated list with Portal freeRecommendedModels entries
(with synthetic free pricing so partition keeps them). The Portal's
/api/nous/recommended-models endpoint is now the source of truth for
free-tier surfacing — old Hermes builds will see new free models
without a CLI release.
- hermes_cli/models.py: _resolve_nous_pricing_credentials() falls back to
the public inference base URL when runtime cred resolution fails.
The /v1/models endpoint exposes pricing without auth, so silently
returning {} just because a refresh token expired was wrong.
- hermes_cli/auth.py + hermes_cli/main.py: both free-tier picker call
sites call union_with_portal_free_recommendations() before partition.
- tests/hermes_cli/test_models.py: 7 tests covering union behaviour
(prepend, dedup, end-to-end with stale pricing, empty/missing/error
payloads, invalid entries).
- tests/hermes_cli/test_model_catalog.py: drift guard
TestManifestMatchesInRepoLists fails CI when _PROVIDER_MODELS['nous']
or OPENROUTER_MODELS is edited without re-running
scripts/build_model_catalog.py. Verified empirically that removing a
manifest entry triggers an assertion with an actionable error message.
Validation:
- 133/133 targeted tests pass (test_models, test_model_catalog,
test_auth_nous_provider).
- Live E2E against the real Portal:
- Stale curated list ['claude-opus','claude-sonnet','gpt-5.4'] (no
qwen) → after union: ['qwen/qwen3.6-plus', ...] →
partition(free_tier=True): selectable=['qwen/qwen3.6-plus'].
- Simulated expired refresh token → anon fetch returns 403 pricing
entries including qwen/qwen3.6-plus -> {prompt:0, completion:0}.
- ruff: clean.
This commit is contained in:
parent
ced1990c1c
commit
e85592591e
5 changed files with 289 additions and 15 deletions
|
|
@ -5251,6 +5251,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
|||
from hermes_cli.models import (
|
||||
get_curated_nous_model_ids, get_pricing_for_provider,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
union_with_portal_free_recommendations,
|
||||
)
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
|
||||
|
|
@ -5260,6 +5261,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
|||
pricing = get_pricing_for_provider("nous")
|
||||
free_tier = check_nous_free_tier()
|
||||
if free_tier:
|
||||
# The Portal's freeRecommendedModels endpoint is the
|
||||
# source of truth for what's free *right now*. Augment
|
||||
# the curated list with anything new the Portal flags
|
||||
# as free so users on older Hermes builds still see
|
||||
# newly-launched free models without a CLI release.
|
||||
_portal_for_recs = auth_state.get("portal_base_url", "")
|
||||
model_ids, pricing = union_with_portal_free_recommendations(
|
||||
model_ids, pricing, _portal_for_recs,
|
||||
)
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
model_ids, pricing, free_tier=True,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2589,6 +2589,7 @@ def _model_flow_nous(config, current_model="", args=None):
|
|||
get_pricing_for_provider,
|
||||
check_nous_free_tier,
|
||||
partition_nous_models_by_tier,
|
||||
union_with_portal_free_recommendations,
|
||||
)
|
||||
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
|
|
@ -2629,19 +2630,8 @@ def _model_flow_nous(config, current_model="", args=None):
|
|||
# Check if user is on free tier
|
||||
free_tier = check_nous_free_tier()
|
||||
|
||||
# For free users: partition models into selectable/unavailable based on
|
||||
# whether they are free per the Portal-reported pricing.
|
||||
unavailable_models: list[str] = []
|
||||
if free_tier:
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
model_ids, pricing, free_tier=True
|
||||
)
|
||||
|
||||
if not model_ids and not unavailable_models:
|
||||
print("No models available for Nous Portal after filtering.")
|
||||
return
|
||||
|
||||
# Resolve portal URL for upgrade links (may differ on staging)
|
||||
# Resolve portal URL early — needed both for upgrade links and for the
|
||||
# freeRecommendedModels endpoint below.
|
||||
_nous_portal_url = ""
|
||||
try:
|
||||
_nous_state = get_provider_auth_state("nous")
|
||||
|
|
@ -2650,6 +2640,24 @@ def _model_flow_nous(config, current_model="", args=None):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# For free users: partition models into selectable/unavailable based on
|
||||
# whether they are free per the Portal-reported pricing. First augment
|
||||
# with the Portal's freeRecommendedModels list so newly-launched free
|
||||
# models show up even if this CLI build's hardcoded curated list and
|
||||
# docs-hosted manifest haven't caught up yet.
|
||||
unavailable_models: list[str] = []
|
||||
if free_tier:
|
||||
model_ids, pricing = union_with_portal_free_recommendations(
|
||||
model_ids, pricing, _nous_portal_url,
|
||||
)
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
model_ids, pricing, free_tier=True
|
||||
)
|
||||
|
||||
if not model_ids and not unavailable_models:
|
||||
print("No models available for Nous Portal after filtering.")
|
||||
return
|
||||
|
||||
if free_tier and not model_ids:
|
||||
print("No free models currently available.")
|
||||
if unavailable_models:
|
||||
|
|
|
|||
|
|
@ -556,6 +556,71 @@ def partition_nous_models_by_tier(
|
|||
return (selectable, unavailable)
|
||||
|
||||
|
||||
def union_with_portal_free_recommendations(
|
||||
curated_ids: list[str],
|
||||
pricing: dict[str, dict[str, str]],
|
||||
portal_base_url: str = "",
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
) -> tuple[list[str], dict[str, dict[str, str]]]:
|
||||
"""Augment curated list + pricing with the Portal's ``freeRecommendedModels``.
|
||||
|
||||
The Portal's ``/api/nous/recommended-models`` endpoint advertises which
|
||||
models are free *right now* — independent of what the in-repo
|
||||
``_PROVIDER_MODELS["nous"]`` list happens to contain or whether the
|
||||
docs-hosted catalog manifest has been rebuilt since the last release.
|
||||
|
||||
For free-tier users this is the source of truth: any model the Portal
|
||||
flags as free should be selectable, even if the user is running an
|
||||
older Hermes that doesn't ship that model in its hardcoded curated
|
||||
list. This function returns an augmented ``(model_ids, pricing)``
|
||||
pair where:
|
||||
|
||||
* Portal free recommendations missing from ``curated_ids`` are
|
||||
appended at the front (so the picker shows them first).
|
||||
* ``pricing`` gets a synthetic ``{"prompt": "0", "completion": "0"}``
|
||||
entry for any free recommendation missing from the live pricing
|
||||
map, so :func:`partition_nous_models_by_tier` keeps it.
|
||||
|
||||
Failures (network, parse, missing field) are silent and degrade to
|
||||
returning the inputs unchanged.
|
||||
"""
|
||||
try:
|
||||
payload = fetch_nous_recommended_models(
|
||||
portal_base_url, force_refresh=force_refresh
|
||||
)
|
||||
except Exception:
|
||||
return (list(curated_ids), dict(pricing))
|
||||
|
||||
free_block = payload.get("freeRecommendedModels") if isinstance(payload, dict) else None
|
||||
if not isinstance(free_block, list) or not free_block:
|
||||
return (list(curated_ids), dict(pricing))
|
||||
|
||||
portal_free_ids: list[str] = []
|
||||
for entry in free_block:
|
||||
name = _extract_model_name(entry)
|
||||
if name:
|
||||
portal_free_ids.append(name)
|
||||
if not portal_free_ids:
|
||||
return (list(curated_ids), dict(pricing))
|
||||
|
||||
augmented_pricing = dict(pricing)
|
||||
free_synthetic = {"prompt": "0", "completion": "0"}
|
||||
for mid in portal_free_ids:
|
||||
if mid not in augmented_pricing:
|
||||
augmented_pricing[mid] = dict(free_synthetic)
|
||||
|
||||
augmented_ids = list(curated_ids)
|
||||
seen = set(augmented_ids)
|
||||
# Prepend Portal free recommendations that aren't already curated, so
|
||||
# they appear first in the picker.
|
||||
new_ones = [mid for mid in portal_free_ids if mid not in seen]
|
||||
if new_ones:
|
||||
augmented_ids = new_ones + augmented_ids
|
||||
|
||||
return (augmented_ids, augmented_pricing)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TTL cache for free-tier detection — avoids repeated API calls within a
|
||||
# session while still picking up upgrades quickly.
|
||||
|
|
@ -1338,8 +1403,21 @@ def _resolve_openrouter_api_key() -> str:
|
|||
return os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||
|
||||
|
||||
_DEFAULT_NOUS_INFERENCE_BASE = "https://inference-api.nousresearch.com"
|
||||
|
||||
|
||||
def _resolve_nous_pricing_credentials() -> tuple[str, str]:
|
||||
"""Return ``(api_key, base_url)`` for Nous Portal pricing, or empty strings."""
|
||||
"""Return ``(api_key, base_url)`` for Nous Portal pricing.
|
||||
|
||||
The Nous inference ``/v1/models`` endpoint exposes pricing without
|
||||
authentication, so the api_key is best-effort: when runtime credential
|
||||
resolution fails (expired refresh token, missing auth.json, etc.) we
|
||||
still return the default inference base URL so the picker keeps
|
||||
working with anonymous pricing data. Free-tier users in particular
|
||||
need this — pricing drives the free/paid partition, and silently
|
||||
returning empty pricing because of an auth blip makes the picker
|
||||
look broken ("No free models currently available").
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
creds = resolve_nous_runtime_credentials()
|
||||
|
|
@ -1347,7 +1425,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
|
|||
return (creds.get("api_key", ""), creds.get("base_url", ""))
|
||||
except Exception:
|
||||
pass
|
||||
return ("", "")
|
||||
return ("", _DEFAULT_NOUS_INFERENCE_BASE)
|
||||
|
||||
|
||||
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
|
||||
|
|
|
|||
|
|
@ -328,3 +328,58 @@ class TestIntegrationWithModelsModule:
|
|||
"anthropic/claude-opus-4.7",
|
||||
"moonshotai/kimi-k2.6",
|
||||
]
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Drift guard — prevent the in-repo curated lists from going out of sync with
|
||||
# the docs-hosted manifest at website/static/api/model-catalog.json.
|
||||
#
|
||||
# History: qwen/qwen3.6-plus was added to _PROVIDER_MODELS["nous"] in commit
|
||||
# 9dd6e5510 but website/static/api/model-catalog.json was not regenerated for
|
||||
# weeks, so free-tier users on a new install fetched a stale manifest and the
|
||||
# free-tier picker showed "No free models currently available." even though
|
||||
# the Portal was serving qwen/qwen3.6-plus as free. CI must catch this.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestManifestMatchesInRepoLists:
|
||||
"""Fail if the on-disk manifest is out of date relative to in-repo lists."""
|
||||
|
||||
@staticmethod
|
||||
def _strip_volatile(catalog: dict) -> dict:
|
||||
"""Drop fields that always change (timestamps) for diff comparison."""
|
||||
out = dict(catalog)
|
||||
out.pop("updated_at", None)
|
||||
return out
|
||||
|
||||
def test_in_repo_lists_match_manifest(self):
|
||||
"""``scripts/build_model_catalog.py`` output must match the committed file.
|
||||
|
||||
If this fails, run ``python scripts/build_model_catalog.py`` and
|
||||
commit the regenerated ``website/static/api/model-catalog.json``.
|
||||
"""
|
||||
# Resolve the repo root from this test file's location.
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
manifest_path = repo_root / "website" / "static" / "api" / "model-catalog.json"
|
||||
|
||||
if not manifest_path.exists():
|
||||
pytest.skip(f"manifest missing at {manifest_path}")
|
||||
|
||||
# Build expected catalog using the same script CI would.
|
||||
import importlib.util
|
||||
script_path = repo_root / "scripts" / "build_model_catalog.py"
|
||||
spec = importlib.util.spec_from_file_location("_build_model_catalog", script_path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(mod)
|
||||
expected = mod.build_catalog()
|
||||
|
||||
with open(manifest_path, encoding="utf-8") as fh:
|
||||
actual = json.load(fh)
|
||||
|
||||
assert self._strip_volatile(actual) == self._strip_volatile(expected), (
|
||||
"website/static/api/model-catalog.json is out of sync with "
|
||||
"_PROVIDER_MODELS['nous'] / OPENROUTER_MODELS. "
|
||||
"Run: python scripts/build_model_catalog.py && "
|
||||
"git add website/static/api/model-catalog.json"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from hermes_cli.models import (
|
|||
OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
|
||||
is_nous_free_tier, partition_nous_models_by_tier,
|
||||
check_nous_free_tier, _FREE_TIER_CACHE_TTL,
|
||||
union_with_portal_free_recommendations,
|
||||
)
|
||||
import hermes_cli.models as _models_mod
|
||||
|
||||
|
|
@ -383,6 +384,128 @@ class TestPartitionNousModelsByTier:
|
|||
assert unav == models
|
||||
|
||||
|
||||
class TestUnionWithPortalFreeRecommendations:
|
||||
"""Tests for union_with_portal_free_recommendations.
|
||||
|
||||
The Portal's freeRecommendedModels endpoint is the source of truth for
|
||||
what's free *right now* — the in-repo curated list and docs-hosted
|
||||
manifest can lag. This helper guarantees the picker still surfaces
|
||||
Portal-flagged free models even when the rest of the catalog is stale.
|
||||
"""
|
||||
|
||||
_PAID = {"prompt": "0.000003", "completion": "0.000015"}
|
||||
_FREE = {"prompt": "0", "completion": "0"}
|
||||
|
||||
def _payload(self, free_models: list[str]) -> dict:
|
||||
return {
|
||||
"freeRecommendedModels": [
|
||||
{"modelName": mid, "displayName": mid} for mid in free_models
|
||||
],
|
||||
}
|
||||
|
||||
def test_adds_portal_free_model_missing_from_curated(self):
|
||||
"""A Portal-advertised free model not in curated is prepended + priced free."""
|
||||
curated = ["anthropic/claude-opus-4.6"]
|
||||
pricing = {"anthropic/claude-opus-4.6": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._payload(["qwen/qwen3.6-plus"]),
|
||||
):
|
||||
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
|
||||
|
||||
assert ids[0] == "qwen/qwen3.6-plus" # prepended
|
||||
assert "anthropic/claude-opus-4.6" in ids
|
||||
# Synthetic free pricing entry created
|
||||
assert p["qwen/qwen3.6-plus"] == self._FREE
|
||||
# Existing pricing untouched
|
||||
assert p["anthropic/claude-opus-4.6"] == self._PAID
|
||||
|
||||
def test_does_not_duplicate_curated_entries(self):
|
||||
"""A Portal free model already in curated is not duplicated."""
|
||||
curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"]
|
||||
pricing = {
|
||||
"qwen/qwen3.6-plus": self._FREE,
|
||||
"anthropic/claude-opus-4.6": self._PAID,
|
||||
}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._payload(["qwen/qwen3.6-plus"]),
|
||||
):
|
||||
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
|
||||
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_then_partition_keeps_portal_free_model(self):
|
||||
"""End-to-end: Portal-flagged free model survives partition."""
|
||||
# Simulate the broken-state-before-this-fix: in-repo curated list
|
||||
# contains qwen/qwen3.6-plus (because new builds shipped it) but
|
||||
# live pricing endpoint hasn't published its zero-cost entry yet.
|
||||
# The Portal's freeRecommendedModels still flags it as free.
|
||||
curated = ["qwen/qwen3.6-plus", "anthropic/claude-opus-4.6"]
|
||||
pricing = {"anthropic/claude-opus-4.6": self._PAID} # qwen missing!
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._payload(["qwen/qwen3.6-plus"]),
|
||||
):
|
||||
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
|
||||
sel, unav = partition_nous_models_by_tier(ids, p, free_tier=True)
|
||||
assert "qwen/qwen3.6-plus" in sel
|
||||
assert "anthropic/claude-opus-4.6" in unav
|
||||
|
||||
def test_empty_payload_returns_inputs_unchanged(self):
|
||||
"""Empty Portal response leaves curated + pricing untouched."""
|
||||
curated = ["a", "b"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
|
||||
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_missing_freeRecommendedModels_key(self):
|
||||
"""Portal payload without freeRecommendedModels degrades gracefully."""
|
||||
curated = ["a"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value={"paidRecommendedModels": [{"modelName": "x"}]},
|
||||
):
|
||||
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_fetch_failure_returns_inputs(self):
|
||||
"""Network failures don't blow up the picker."""
|
||||
curated = ["a"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
side_effect=RuntimeError("network down"),
|
||||
):
|
||||
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_invalid_entries_skipped(self):
|
||||
"""Non-dict / missing-modelName entries are filtered out."""
|
||||
curated = ["a"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value={
|
||||
"freeRecommendedModels": [
|
||||
"not-a-dict",
|
||||
{"displayName": "no-modelName"},
|
||||
{"modelName": ""},
|
||||
{"modelName": "qwen/qwen3.6-plus"},
|
||||
]
|
||||
},
|
||||
):
|
||||
ids, p = union_with_portal_free_recommendations(curated, pricing, "")
|
||||
assert ids == ["qwen/qwen3.6-plus", "a"]
|
||||
assert p["qwen/qwen3.6-plus"] == self._FREE
|
||||
|
||||
|
||||
class TestCheckNousFreeTierCache:
|
||||
"""Tests for the TTL cache on check_nous_free_tier()."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue