fix(models): scope live-first picker merge to opencode aggregators only

Follow-up to the salvaged #49129 commit. The original change flipped the
shared generic-provider merge in provider_model_ids() to live-first
unconditionally, which regressed curated-first for single providers
(kimi/zai, #46309) — and the PR encoded that regression by flipping the
kimi-coding and zai test assertions to expect live-first.

Gate live-first on an explicit _LIVE_FIRST_PICKER_PROVIDERS set
({opencode-zen, opencode-go}); every other provider keeps curated-first.
Also widen the uncapped picker + live-first sets to opencode-go, which has
the same 70+ model catalog problem as opencode-zen. Restore the
kimi-coding curated-first test and rewrite the merge-order test to assert
the per-provider contract.
This commit is contained in:
teknium1 2026-06-27 21:02:09 -07:00 committed by Teknium
parent f98ffbc246
commit f54c52800a
4 changed files with 118 additions and 177 deletions

View file

@ -45,9 +45,9 @@ from agent.models_dev import (
)
# Providers whose picker model list should NOT be capped by max_models.
# OpenCode Zen is an aggregator whose full catalog must be visible so
# users can pick any model they have access to.
_UNCAPPED_PICKER_PROVIDERS: frozenset[str] = frozenset({"opencode-zen"})
# OpenCode Zen / Go are aggregators whose full catalogs (70+ models each) must
# be visible so users can pick any model they have access to.
_UNCAPPED_PICKER_PROVIDERS: frozenset[str] = frozenset({"opencode-zen", "opencode-go"})
logger = logging.getLogger(__name__)

View file

@ -1795,6 +1795,17 @@ _AGGREGATOR_PROVIDERS = frozenset(
# away from the model's native vendor). None are currently defined.
_BORROWED_MODEL_PROVIDERS: frozenset[str] = frozenset()
# Providers whose live /v1/models endpoint is the authoritative catalog, so the
# curated list is a discovery-only fallback. For these, the picker merges
# live-first (live entries lead, curated-only entries append). Every OTHER
# provider keeps curated-first (commit 658ac1d86, #46309) so a deliberately
# surfaced newest model stays at the top even when the live API lags. OpenCode
# Zen / Go re-expose dozens of upstream vendors and rotate them frequently, so
# their stale curated entries must not pollute the top of the picker. (#49129)
_LIVE_FIRST_PICKER_PROVIDERS: frozenset[str] = frozenset(
{"opencode-zen", "opencode-go"}
)
def _resolve_static_model_alias(
name_lower: str,
@ -2433,24 +2444,22 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
# Merge static curated list with live API results so
# models that the live endpoint omits (stale cache,
# partial rollout) still appear in the picker.
# Live API entries come first (the provider's authoritative
# catalog), then curated-only entries are appended for
# discovery — models that the live endpoint hasn't caught up
# on still surface, but models the provider no longer serves
# (stale curated entries) don't pollute the top of the picker.
#
# Design note: Single providers (kimi, zai) use curated-first
# Single providers (kimi, zai) use curated-first
# (commit 658ac1d86) to surface newest models even when live
# API lags (#46309). However, aggregators like OpenCode Zen
# have a live API as their authoritative catalog — the curated
# list is just a fallback for models the live endpoint hasn't
# caught up on. For aggregators, live-first prevents stale
# curated entries from polluting the picker. (#46850)
# API lags (#46309). OpenCode Zen / Go are different: their
# live API is the authoritative catalog, so they merge
# live-first — live entries lead and stale curated entries
# no longer pollute the top of the picker. (#49129)
curated = list(_PROVIDER_MODELS.get(normalized, []))
if curated:
merged = list(live)
merged_lower = {m.lower() for m in live}
for m in curated:
if normalized in _LIVE_FIRST_PICKER_PROVIDERS:
primary, secondary = live, curated
else:
primary, secondary = curated, live
merged = list(primary)
merged_lower = {m.lower() for m in primary}
for m in secondary:
if m.lower() not in merged_lower:
merged.append(m)
merged_lower.add(m.lower())

View file

@ -114,8 +114,8 @@ class TestProviderModelIdsPreferred:
patch("providers.base.ProviderProfile.fetch_models", return_value=["kimi-k2.6"]),
):
out = provider_model_ids("kimi-coding")
# Live-first order; live entry (k2.6) comes before curated-only (k2.7-code).
assert out[:2] == ["kimi-k2.6", "kimi-k2.7-code"]
# Curated-first order; curated newest (k2.7-code) stays ahead of live.
assert out[:2] == ["kimi-k2.7-code", "kimi-k2.6"]
def test_kimi_setup_flow_uses_same_coding_plan_catalog(self):
"""The setup wizard must not carry a stale duplicate Kimi model list."""

View file

@ -1,13 +1,23 @@
"""Tests for live+curated merge in the generic profile-based provider path.
Guards the fix for #46850: when a provider's live /v1/models endpoint
returns a stale or incomplete list, the static curated models from
``_PROVIDER_MODELS`` must still appear in the merged result.
Guards two contracts:
* #46850 — when a provider's live /v1/models endpoint returns a stale or
incomplete list, the static curated models from ``_PROVIDER_MODELS`` must
still appear in the merged result (nothing is dropped).
* #46309 / #49129 — merge *order* is per-provider. Single providers
(kimi, zai) stay **curated-first** so a deliberately surfaced newest model
leads even when the live API lags. ``_LIVE_FIRST_PICKER_PROVIDERS``
(OpenCode Zen / Go) flip to **live-first** because their live API is the
authoritative catalog and stale curated entries must not lead the picker.
"""
from unittest.mock import MagicMock, patch
from hermes_cli.models import _PROVIDER_MODELS, provider_model_ids
from hermes_cli.models import (
_LIVE_FIRST_PICKER_PROVIDERS,
provider_model_ids,
)
class TestGenericProviderLiveCuratedMerge:
@ -22,46 +32,84 @@ class TestGenericProviderLiveCuratedMerge:
p.fallback_models = None
return p
def test_live_models_merged_with_curated(self):
"""Live models come first; curated-only models are appended."""
live = ["glm-5.2", "glm-5.1", "glm-5"]
curated = _PROVIDER_MODELS["zai"] # includes glm-5.1, glm-5, glm-4.5, etc.
def test_curated_first_for_single_provider(self):
"""Single providers (zai) stay curated-first; live-only appended."""
assert "zai" not in _LIVE_FIRST_PICKER_PROVIDERS
curated = ["glm-5.2", "glm-5.1", "glm-5"] # authoritative-intent order
# Live API lags AND surfaces a brand-new model not yet curated.
live = ["glm-5", "glm-6-preview"]
profile = self._make_profile(live)
with (
patch("providers.get_provider_profile", return_value=profile),
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}),
):
result = provider_model_ids("zai")
# Live entries first (provider's authoritative catalog),
# curated-only entries appended afterwards.
assert result[: len(live)] == list(live)
assert result[0] == "glm-5.2"
# Models present in both live and curated are not duplicated.
assert result.count("glm-5.2") == 1
assert result.count("glm-5.1") == 1
# Curated-only entries are part of the result (e.g. glm-4.5).
result_lower = [m.lower() for m in result]
assert "glm-4.5" in result_lower
assert "glm-4.5-flash" in result_lower
def test_no_duplicate_models(self):
"""Models appearing in both live and curated are not duplicated."""
live = ["glm-5.1", "glm-5"]
curated = ["glm-5.1", "glm-5", "glm-4.5"]
profile = self._make_profile(live)
with (
patch("providers.get_provider_profile", return_value=profile),
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}),
patch(
"hermes_cli.auth.resolve_api_key_provider_credentials",
return_value={"api_key": "k", "base_url": ""},
),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}),
):
result = provider_model_ids("zai")
assert result.count("glm-5.1") == 1
# Curated entries lead (commit 658ac1d86, #46309).
assert result[: len(curated)] == curated
# Live-only entries (glm-6-preview) still surface, appended afterwards.
assert "glm-6-preview" in result
assert result.index("glm-6-preview") >= len(curated)
# No duplicates for models present in both.
assert result.count("glm-5") == 1
assert result == ["glm-5.1", "glm-5", "glm-4.5"]
def test_live_first_for_opencode_zen(self):
"""OpenCode Zen flips to live-first; curated-only models appended."""
assert "opencode-zen" in _LIVE_FIRST_PICKER_PROVIDERS
live = ["nemotron-3-ultra-free", "gpt-5.5", "claude-fable-5"]
curated = ["gpt-5.5", "claude-fable-5", "big-pickle"]
profile = self._make_profile(live)
with (
patch("providers.get_provider_profile", return_value=profile),
patch(
"hermes_cli.auth.resolve_api_key_provider_credentials",
return_value={"api_key": "k", "base_url": ""},
),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"opencode-zen": curated}),
):
result = provider_model_ids("opencode-zen")
# Live entries lead (authoritative aggregator catalog).
assert result[: len(live)] == list(live)
assert result[0] == "nemotron-3-ultra-free"
# Curated-only entries (big-pickle) appended for discovery.
assert "big-pickle" in result
assert result.index("big-pickle") >= len(live)
# No duplicates.
assert result.count("gpt-5.5") == 1
def test_no_models_dropped_either_direction(self):
"""Every live AND curated model survives the merge for both modes."""
live = ["a", "b"]
# zai = curated-first
with (
patch("providers.get_provider_profile", return_value=self._make_profile(live)),
patch(
"hermes_cli.auth.resolve_api_key_provider_credentials",
return_value={"api_key": "k", "base_url": ""},
),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": ["c", "b"]}),
):
zai_result = set(provider_model_ids("zai"))
assert {"a", "b", "c"} <= zai_result
# opencode-zen = live-first
with (
patch("providers.get_provider_profile", return_value=self._make_profile(live)),
patch(
"hermes_cli.auth.resolve_api_key_provider_credentials",
return_value={"api_key": "k", "base_url": ""},
),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"opencode-zen": ["c", "b"]}),
):
zen_result = set(provider_model_ids("opencode-zen"))
assert {"a", "b", "c"} <= zen_result
def test_case_insensitive_dedup(self):
"""Dedup is case-insensitive but preserves first occurrence casing."""
@ -71,129 +119,13 @@ class TestGenericProviderLiveCuratedMerge:
with (
patch("providers.get_provider_profile", return_value=profile),
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}),
):
result = provider_model_ids("zai")
# Live-first: live casing wins for models present in both.
assert result == ["GLM-5.1", "glm-5", "glm-4.5"]
def test_empty_curated_returns_live_only(self):
"""When no curated list exists, live is returned as-is."""
live = ["model-a", "model-b"]
profile = self._make_profile(live)
with (
patch("providers.get_provider_profile", return_value=profile),
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": []}),
):
result = provider_model_ids("zai")
assert result == ["model-a", "model-b"]
def test_live_empty_falls_back_to_curated(self):
"""When live returns nothing, curated static list is used.
ZAI is in _MODELS_DEV_PREFERRED so the fallback path merges with
models.dev. We mock _merge_with_models_dev to isolate the test.
"""
curated = ["glm-5.1", "glm-5", "glm-4.5"]
profile = self._make_profile([])
with (
patch("providers.get_provider_profile", return_value=profile),
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "k", "base_url": ""}),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}),
patch("hermes_cli.models._merge_with_models_dev", return_value=curated),
):
result = provider_model_ids("zai")
assert result == curated
class TestValidateRequestedModelCuratedFallback:
"""validate_requested_model falls back to curated catalog when live API omits model."""
def test_model_in_curated_but_not_live_is_accepted(self):
"""When live /v1/models omits a model that exists in the curated
catalog, validate_requested_model should accept it with a note.
Patches the real ``_PROVIDER_MODELS`` source (not the function under
test) so the curated-catalog fallback is genuinely exercised.
"""
from hermes_cli.models import validate_requested_model
# Live API returns only glm-5.1, but curated has glm-5.2
live_models = ["glm-5.1"]
curated = ["glm-5.2", "glm-5.1", "glm-5", "glm-4.5"]
with (
patch("hermes_cli.models.fetch_api_models", return_value=live_models),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}),
):
result = validate_requested_model("glm-5.2", "zai", api_key="dummy")
assert result["accepted"] is True
assert result["recognized"] is True
assert result["message"] is not None
assert "curated catalog" in result["message"]
def test_model_not_in_curated_nor_live_is_rejected(self):
"""When a model is in neither live nor curated, it should be rejected."""
from hermes_cli.models import validate_requested_model
live_models = ["glm-5.1"]
curated = ["glm-5.1", "glm-5", "glm-4.5"]
with (
patch("hermes_cli.models.fetch_api_models", return_value=live_models),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}),
):
result = validate_requested_model("nonexistent-model", "zai", api_key="dummy")
assert result["accepted"] is False
def test_model_in_live_is_accepted_without_curated_check(self):
"""When the model is in the live API, it should be accepted directly."""
from hermes_cli.models import validate_requested_model
live_models = ["glm-5.1", "glm-5"]
with patch("hermes_cli.models.fetch_api_models", return_value=live_models):
result = validate_requested_model("glm-5.1", "zai", api_key="dummy")
assert result["accepted"] is True
assert result["recognized"] is True
assert result["message"] is None
def test_curated_fallback_is_scoped_to_the_current_provider(self):
"""The curated fallback must not leak models across providers.
A model that lives in some OTHER provider's catalog (or only on an
aggregator like OpenRouter) must still be rejected when the current
provider neither lists it live nor ships it in its OWN curated
catalog. The fallback keys on ``_provider_keys(normalized)``, so
catalog membership is checked per-provider, never globally.
"""
from hermes_cli.models import validate_requested_model
# `some-other-model` is known to a DIFFERENT provider, not to zai.
# zai's live listing also omits it. It must be rejected.
live_models = ["glm-5.1"]
with (
patch("hermes_cli.models.fetch_api_models", return_value=live_models),
patch.dict(
"hermes_cli.models._PROVIDER_MODELS",
{"zai": ["glm-5.2", "glm-5.1"], "openrouter": ["some-other-model"]},
patch(
"hermes_cli.auth.resolve_api_key_provider_credentials",
return_value={"api_key": "k", "base_url": ""},
),
patch.dict("hermes_cli.models._PROVIDER_MODELS", {"zai": curated}),
):
result = validate_requested_model("some-other-model", "zai", api_key="dummy")
assert result["accepted"] is False, (
"A model only present in another provider's catalog must not be "
"accepted on this provider via the curated fallback."
)
result = provider_model_ids("zai")
# zai is curated-first: curated casing wins for models present in both.
assert result == ["glm-5.1", "GLM-5", "glm-4.5"]