diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index e2d26723881..48bf031b75b 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -45,9 +45,9 @@ from agent.models_dev import ( ) # Providers whose picker model list should NOT be capped by max_models. -# OpenCode Zen is an aggregator whose full catalog must be visible so -# users can pick any model they have access to. -_UNCAPPED_PICKER_PROVIDERS: frozenset[str] = frozenset({"opencode-zen"}) +# OpenCode Zen / Go are aggregators whose full catalogs (70+ models each) must +# be visible so users can pick any model they have access to. +_UNCAPPED_PICKER_PROVIDERS: frozenset[str] = frozenset({"opencode-zen", "opencode-go"}) logger = logging.getLogger(__name__) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 2f48dcc167f..38e7c80270a 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1795,6 +1795,17 @@ _AGGREGATOR_PROVIDERS = frozenset( # away from the model's native vendor). None are currently defined. _BORROWED_MODEL_PROVIDERS: frozenset[str] = frozenset() +# Providers whose live /v1/models endpoint is the authoritative catalog, so the +# curated list is a discovery-only fallback. For these, the picker merges +# live-first (live entries lead, curated-only entries append). Every OTHER +# provider keeps curated-first (commit 658ac1d86, #46309) so a deliberately +# surfaced newest model stays at the top even when the live API lags. OpenCode +# Zen / Go re-expose dozens of upstream vendors and rotate them frequently, so +# their stale curated entries must not pollute the top of the picker. (#49129) +_LIVE_FIRST_PICKER_PROVIDERS: frozenset[str] = frozenset( + {"opencode-zen", "opencode-go"} +) + def _resolve_static_model_alias( name_lower: str, @@ -2433,24 +2444,22 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) # Merge static curated list with live API results so # models that the live endpoint omits (stale cache, # partial rollout) still appear in the picker. - # Live API entries come first (the provider's authoritative - # catalog), then curated-only entries are appended for - # discovery — models that the live endpoint hasn't caught up - # on still surface, but models the provider no longer serves - # (stale curated entries) don't pollute the top of the picker. # - # Design note: Single providers (kimi, zai) use curated-first + # Single providers (kimi, zai) use curated-first # (commit 658ac1d86) to surface newest models even when live - # API lags (#46309). However, aggregators like OpenCode Zen - # have a live API as their authoritative catalog — the curated - # list is just a fallback for models the live endpoint hasn't - # caught up on. For aggregators, live-first prevents stale - # curated entries from polluting the picker. (#46850) + # API lags (#46309). OpenCode Zen / Go are different: their + # live API is the authoritative catalog, so they merge + # live-first — live entries lead and stale curated entries + # no longer pollute the top of the picker. (#49129) curated = list(_PROVIDER_MODELS.get(normalized, [])) if curated: - merged = list(live) - merged_lower = {m.lower() for m in live} - for m in curated: + if normalized in _LIVE_FIRST_PICKER_PROVIDERS: + primary, secondary = live, curated + else: + primary, secondary = curated, live + merged = list(primary) + merged_lower = {m.lower() for m in primary} + for m in secondary: if m.lower() not in merged_lower: merged.append(m) merged_lower.add(m.lower()) diff --git a/tests/hermes_cli/test_models_dev_preferred_merge.py b/tests/hermes_cli/test_models_dev_preferred_merge.py index 1a1fb835a64..dfa25d1bb2e 100644 --- a/tests/hermes_cli/test_models_dev_preferred_merge.py +++ b/tests/hermes_cli/test_models_dev_preferred_merge.py @@ -114,8 +114,8 @@ class TestProviderModelIdsPreferred: patch("providers.base.ProviderProfile.fetch_models", return_value=["kimi-k2.6"]), ): out = provider_model_ids("kimi-coding") - # Live-first order; live entry (k2.6) comes before curated-only (k2.7-code). - assert out[:2] == ["kimi-k2.6", "kimi-k2.7-code"] + # Curated-first order; curated newest (k2.7-code) stays ahead of live. + assert out[:2] == ["kimi-k2.7-code", "kimi-k2.6"] def test_kimi_setup_flow_uses_same_coding_plan_catalog(self): """The setup wizard must not carry a stale duplicate Kimi model list.""" diff --git a/tests/hermes_cli/test_provider_live_curated_merge.py b/tests/hermes_cli/test_provider_live_curated_merge.py index 038ba8208df..2fb80cac0c0 100644 --- a/tests/hermes_cli/test_provider_live_curated_merge.py +++ b/tests/hermes_cli/test_provider_live_curated_merge.py @@ -1,13 +1,23 @@ """Tests for live+curated merge in the generic profile-based provider path. -Guards the fix for #46850: when a provider's live /v1/models endpoint -returns a stale or incomplete list, the static curated models from -``_PROVIDER_MODELS`` must still appear in the merged result. +Guards two contracts: + +* #46850 — when a provider's live /v1/models endpoint returns a stale or + incomplete list, the static curated models from ``_PROVIDER_MODELS`` must + still appear in the merged result (nothing is dropped). +* #46309 / #49129 — merge *order* is per-provider. Single providers + (kimi, zai) stay **curated-first** so a deliberately surfaced newest model + leads even when the live API lags. ``_LIVE_FIRST_PICKER_PROVIDERS`` + (OpenCode Zen / Go) flip to **live-first** because their live API is the + authoritative catalog and stale curated entries must not lead the picker. """ from unittest.mock import MagicMock, patch -from hermes_cli.models import _PROVIDER_MODELS, provider_model_ids +from hermes_cli.models import ( + _LIVE_FIRST_PICKER_PROVIDERS, + provider_model_ids, +) class TestGenericProviderLiveCuratedMerge: @@ -22,46 +32,84 @@ class TestGenericProviderLiveCuratedMerge: p.fallback_models = None return p - def test_live_models_merged_with_curated(self): - """Live models come first; curated-only models are appended.""" - live = ["glm-5.2", "glm-5.1", "glm-5"] - curated = _PROVIDER_MODELS["zai"] # includes glm-5.1, glm-5, glm-4.5, etc. + def test_curated_first_for_single_provider(self): + """Single providers (zai) stay curated-first; live-only appended.""" + assert "zai" not in _LIVE_FIRST_PICKER_PROVIDERS + curated = ["glm-5.2", "glm-5.1", "glm-5"] # authoritative-intent order + # Live API lags AND surfaces a brand-new model not yet curated. + live = ["glm-5", "glm-6-preview"] profile = self._make_profile(live) with ( patch("providers.get_provider_profile", return_value=profile), - patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}), - ): - result = provider_model_ids("zai") - - # Live entries first (provider's authoritative catalog), - # curated-only entries appended afterwards. - assert result[: len(live)] == list(live) - assert result[0] == "glm-5.2" - # Models present in both live and curated are not duplicated. - assert result.count("glm-5.2") == 1 - assert result.count("glm-5.1") == 1 - # Curated-only entries are part of the result (e.g. glm-4.5). - result_lower = [m.lower() for m in result] - assert "glm-4.5" in result_lower - assert "glm-4.5-flash" in result_lower - - def test_no_duplicate_models(self): - """Models appearing in both live and curated are not duplicated.""" - live = ["glm-5.1", "glm-5"] - curated = ["glm-5.1", "glm-5", "glm-4.5"] - profile = self._make_profile(live) - - with ( - patch("providers.get_provider_profile", return_value=profile), - patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}), + patch( + "hermes_cli.auth.resolve_api_key_provider_credentials", + return_value={"api_key": "k", "base_url": ""}, + ), patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}), ): result = provider_model_ids("zai") - assert result.count("glm-5.1") == 1 + # Curated entries lead (commit 658ac1d86, #46309). + assert result[: len(curated)] == curated + # Live-only entries (glm-6-preview) still surface, appended afterwards. + assert "glm-6-preview" in result + assert result.index("glm-6-preview") >= len(curated) + # No duplicates for models present in both. assert result.count("glm-5") == 1 - assert result == ["glm-5.1", "glm-5", "glm-4.5"] + + def test_live_first_for_opencode_zen(self): + """OpenCode Zen flips to live-first; curated-only models appended.""" + assert "opencode-zen" in _LIVE_FIRST_PICKER_PROVIDERS + live = ["nemotron-3-ultra-free", "gpt-5.5", "claude-fable-5"] + curated = ["gpt-5.5", "claude-fable-5", "big-pickle"] + profile = self._make_profile(live) + + with ( + patch("providers.get_provider_profile", return_value=profile), + patch( + "hermes_cli.auth.resolve_api_key_provider_credentials", + return_value={"api_key": "k", "base_url": ""}, + ), + patch.dict("hermes_cli.models._PROVIDER_MODELS", {"opencode-zen": curated}), + ): + result = provider_model_ids("opencode-zen") + + # Live entries lead (authoritative aggregator catalog). + assert result[: len(live)] == list(live) + assert result[0] == "nemotron-3-ultra-free" + # Curated-only entries (big-pickle) appended for discovery. + assert "big-pickle" in result + assert result.index("big-pickle") >= len(live) + # No duplicates. + assert result.count("gpt-5.5") == 1 + + def test_no_models_dropped_either_direction(self): + """Every live AND curated model survives the merge for both modes.""" + live = ["a", "b"] + # zai = curated-first + with ( + patch("providers.get_provider_profile", return_value=self._make_profile(live)), + patch( + "hermes_cli.auth.resolve_api_key_provider_credentials", + return_value={"api_key": "k", "base_url": ""}, + ), + patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": ["c", "b"]}), + ): + zai_result = set(provider_model_ids("zai")) + assert {"a", "b", "c"} <= zai_result + + # opencode-zen = live-first + with ( + patch("providers.get_provider_profile", return_value=self._make_profile(live)), + patch( + "hermes_cli.auth.resolve_api_key_provider_credentials", + return_value={"api_key": "k", "base_url": ""}, + ), + patch.dict("hermes_cli.models._PROVIDER_MODELS", {"opencode-zen": ["c", "b"]}), + ): + zen_result = set(provider_model_ids("opencode-zen")) + assert {"a", "b", "c"} <= zen_result def test_case_insensitive_dedup(self): """Dedup is case-insensitive but preserves first occurrence casing.""" @@ -71,129 +119,13 @@ class TestGenericProviderLiveCuratedMerge: with ( patch("providers.get_provider_profile", return_value=profile), - patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}), - patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}), - ): - result = provider_model_ids("zai") - - # Live-first: live casing wins for models present in both. - assert result == ["GLM-5.1", "glm-5", "glm-4.5"] - - def test_empty_curated_returns_live_only(self): - """When no curated list exists, live is returned as-is.""" - live = ["model-a", "model-b"] - profile = self._make_profile(live) - - with ( - patch("providers.get_provider_profile", return_value=profile), - patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}), - patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": []}), - ): - result = provider_model_ids("zai") - - assert result == ["model-a", "model-b"] - - def test_live_empty_falls_back_to_curated(self): - """When live returns nothing, curated static list is used. - - ZAI is in _MODELS_DEV_PREFERRED so the fallback path merges with - models.dev. We mock _merge_with_models_dev to isolate the test. - """ - curated = ["glm-5.1", "glm-5", "glm-4.5"] - profile = self._make_profile([]) - - with ( - patch("providers.get_provider_profile", return_value=profile), - patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}), - patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}), - patch("hermes_cli.models._merge_with_models_dev", return_value=curated), - ): - result = provider_model_ids("zai") - - assert result == curated - - -class TestValidateRequestedModelCuratedFallback: - """validate_requested_model falls back to curated catalog when live API omits model.""" - - def test_model_in_curated_but_not_live_is_accepted(self): - """When live /v1/models omits a model that exists in the curated - catalog, validate_requested_model should accept it with a note. - - Patches the real ``_PROVIDER_MODELS`` source (not the function under - test) so the curated-catalog fallback is genuinely exercised. - """ - from hermes_cli.models import validate_requested_model - - # Live API returns only glm-5.1, but curated has glm-5.2 - live_models = ["glm-5.1"] - curated = ["glm-5.2", "glm-5.1", "glm-5", "glm-4.5"] - - with ( - patch("hermes_cli.models.fetch_api_models", return_value=live_models), - patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}), - ): - result = validate_requested_model("glm-5.2", "zai", api_key="dummy") - - assert result["accepted"] is True - assert result["recognized"] is True - assert result["message"] is not None - assert "curated catalog" in result["message"] - - def test_model_not_in_curated_nor_live_is_rejected(self): - """When a model is in neither live nor curated, it should be rejected.""" - from hermes_cli.models import validate_requested_model - - live_models = ["glm-5.1"] - curated = ["glm-5.1", "glm-5", "glm-4.5"] - - with ( - patch("hermes_cli.models.fetch_api_models", return_value=live_models), - patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}), - ): - result = validate_requested_model("nonexistent-model", "zai", api_key="dummy") - - assert result["accepted"] is False - - def test_model_in_live_is_accepted_without_curated_check(self): - """When the model is in the live API, it should be accepted directly.""" - from hermes_cli.models import validate_requested_model - - live_models = ["glm-5.1", "glm-5"] - - with patch("hermes_cli.models.fetch_api_models", return_value=live_models): - result = validate_requested_model("glm-5.1", "zai", api_key="dummy") - - assert result["accepted"] is True - assert result["recognized"] is True - assert result["message"] is None - - def test_curated_fallback_is_scoped_to_the_current_provider(self): - """The curated fallback must not leak models across providers. - - A model that lives in some OTHER provider's catalog (or only on an - aggregator like OpenRouter) must still be rejected when the current - provider neither lists it live nor ships it in its OWN curated - catalog. The fallback keys on ``_provider_keys(normalized)``, so - catalog membership is checked per-provider, never globally. - """ - from hermes_cli.models import validate_requested_model - - # `some-other-model` is known to a DIFFERENT provider, not to zai. - # zai's live listing also omits it. It must be rejected. - live_models = ["glm-5.1"] - - with ( - patch("hermes_cli.models.fetch_api_models", return_value=live_models), - patch.dict( - "hermes_cli.models._PROVIDER_MODELS", - {"zai": ["glm-5.2", "glm-5.1"], "openrouter": ["some-other-model"]}, + patch( + "hermes_cli.auth.resolve_api_key_provider_credentials", + return_value={"api_key": "k", "base_url": ""}, ), + patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}), ): - result = validate_requested_model("some-other-model", "zai", api_key="dummy") - - assert result["accepted"] is False, ( - "A model only present in another provider's catalog must not be " - "accepted on this provider via the curated fallback." - ) + result = provider_model_ids("zai") + # zai is curated-first: curated casing wins for models present in both. + assert result == ["glm-5.1", "GLM-5", "glm-4.5"]