diff --git a/hermes_cli/models.py b/hermes_cli/models.py index ae54217952..33614d4263 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -2426,13 +2426,70 @@ def validate_requested_model( except Exception: pass # Fall through to generic warning + # Static-catalog fallback: when the /models probe was unreachable, + # validate against the curated list from provider_model_ids() — same + # pattern as the openai-codex and minimax branches above. This fixes + # /model switches in the gateway for providers like opencode-go and + # opencode-zen whose /models endpoint returns 404 against the HTML + # marketing site. Without this block, validate_requested_model would + # reject every model on such providers, switch_model() would return + # success=False, and the gateway would never write to + # _session_model_overrides. provider_label = _PROVIDER_LABELS.get(normalized, normalized) + try: + catalog_models = provider_model_ids(normalized) + except Exception: + catalog_models = [] + + if catalog_models: + catalog_lower = {m.lower(): m for m in catalog_models} + if requested_for_lookup.lower() in catalog_lower: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + catalog_lower_list = list(catalog_lower.keys()) + auto = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9 + ) + if auto: + corrected = catalog_lower[auto[0]] + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": corrected, + "message": f"Auto-corrected `{requested}` → `{corrected}`", + } + suggestions = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5 + ) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join( + f"`{catalog_lower[s]}`" for s in suggestions + ) + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in the {provider_label} curated catalog " + f"and the /models endpoint was unreachable.{suggestion_text}" + f"\n The model may still work if it exists on the provider." + ), + } + + # No catalog available — accept with a warning, matching the comment's + # stated intent ("Accept and persist, but warn"). return { - "accepted": False, - "persist": False, + "accepted": True, + "persist": True, "recognized": False, "message": ( - f"Could not reach the {provider_label} API to validate `{requested}`. " + f"Note: could not reach the {provider_label} API to validate `{requested}`. " f"If the service isn't down, this model may not be valid." ), } diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py index 65405d909f..72ffc5216d 100644 --- a/tests/hermes_cli/test_model_validation.py +++ b/tests/hermes_cli/test_model_validation.py @@ -457,29 +457,62 @@ class TestValidateApiNotFound: assert "not found" in result["message"] -# -- validate — API unreachable — reject with guidance ---------------- +# -- validate — API unreachable — soft-accept via catalog or warning -------- class TestValidateApiFallback: - def test_any_model_rejected_when_api_down(self): - result = _validate("anthropic/claude-opus-4.6", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + """When /models is unreachable, the validator must accept the model (with + a warning) rather than reject it outright — otherwise provider switches + fail in the gateway for any provider whose /models endpoint is down or + doesn't exist (e.g. opencode-go returns 404 HTML). - def test_unknown_model_also_rejected_when_api_down(self): - result = _validate("anthropic/claude-next-gen", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False - assert "could not reach" in result["message"].lower() + Two paths: + 1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch): + validate against it (recognized=True for known models, + recognized=False with 'Note:' for unknown). + 2. Provider has no catalog: accept with a generic 'Note:' warning. - def test_zai_model_rejected_when_api_down(self): + In both cases ``accepted`` and ``persist`` must be True so the gateway can + write the ``_session_model_overrides`` entry. + """ + + def test_known_model_accepted_via_catalog_when_api_down(self): + # Force the openrouter catalog lookup to return a deterministic list. + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"], + ): + result = _validate("anthropic/claude-opus-4.6", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True + + def test_unknown_model_accepted_with_note_when_api_down(self): + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"], + ): + result = _validate("anthropic/claude-next-gen", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + # Message flags it as unverified against the catalog. + assert "not found" in result["message"].lower() or "note" in result["message"].lower() + + def test_zai_known_model_accepted_via_catalog_when_api_down(self): + # glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]). result = _validate("glm-5", provider="zai", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True - def test_unknown_provider_rejected_when_api_down(self): - result = _validate("some-model", provider="totally-unknown", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + def test_unknown_provider_soft_accepted_when_api_down(self): + # No catalog for unknown providers — soft-accept with a Note. + with patch("hermes_cli.models.provider_model_ids", return_value=[]): + result = _validate("some-model", provider="totally-unknown", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "note" in result["message"].lower() def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self): with patch( diff --git a/tests/hermes_cli/test_opencode_go_validation_fallback.py b/tests/hermes_cli/test_opencode_go_validation_fallback.py new file mode 100644 index 0000000000..f0ae76098e --- /dev/null +++ b/tests/hermes_cli/test_opencode_go_validation_fallback.py @@ -0,0 +1,133 @@ +"""Tests for the static-catalog fallback in validate_requested_model. + +OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do +NOT expose ``/models`` (the path returns the marketing site's HTML 404). This +caused ``validate_requested_model`` to return ``accepted=False`` for every +model on those providers, which in turn made ``switch_model()`` fail and the +gateway's ``/model --provider opencode-go`` command never write to +``_session_model_overrides``. + +These tests cover the catalog-fallback path: when ``fetch_api_models`` returns +``None``, the validator must consult ``provider_model_ids()`` for the provider +(populated from ``_PROVIDER_MODELS``) rather than rejecting outright. +""" + +from unittest.mock import patch + +from hermes_cli.models import validate_requested_model + + +_UNREACHABLE_PROBE = { + "models": None, + "probed_url": "https://opencode.ai/zen/go/v1/models", + "resolved_base_url": "https://opencode.ai/zen/go/v1", + "suggested_base_url": None, + "used_fallback": False, +} + + +def _patched(func): + """Decorator: force fetch_api_models / probe_api_models to simulate an + unreachable /models endpoint, proving the catalog path is used.""" + def wrapper(*args, **kwargs): + with patch("hermes_cli.models.fetch_api_models", return_value=None), \ + patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE): + return func(*args, **kwargs) + wrapper.__name__ = func.__name__ + return wrapper + + +# --------------------------------------------------------------------------- +# opencode-go: curated catalog in _PROVIDER_MODELS +# --------------------------------------------------------------------------- + + +@_patched +def test_opencode_go_known_model_accepted(): + """A model present in the opencode-go curated catalog must be accepted + even when /models is unreachable.""" + result = validate_requested_model("kimi-k2.6", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True + assert result["message"] is None + + +@_patched +def test_opencode_go_known_model_case_insensitive(): + """Catalog lookup is case-insensitive.""" + result = validate_requested_model("KIMI-K2.6", "opencode-go") + assert result["accepted"] is True + assert result["recognized"] is True + + +@_patched +def test_opencode_go_typo_auto_corrected(): + """A close typo (>= 0.9 similarity) is auto-corrected to the catalog + entry.""" + # 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff. + result = validate_requested_model("kimi-k2.55", "opencode-go") + assert result["accepted"] is True + assert result["recognized"] is True + assert result.get("corrected_model") == "kimi-k2.5" + + +@_patched +def test_opencode_go_unknown_model_accepted_with_suggestion(): + """An unknown model that has a medium-similarity match (>= 0.5 but < 0.9) + is accepted with recognized=False and a 'similar models' hint. The key + invariant: the gateway MUST be able to persist this override, so + accepted/persist must both be True.""" + # 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct. + result = validate_requested_model("kimi-k3-preview", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "kimi-k3-preview" in result["message"] + assert "curated catalog" in result["message"] + + +@_patched +def test_opencode_go_totally_unknown_model_still_accepted(): + """A model with zero similarity to the catalog is still accepted (no + suggestion line) so the user can try a model that hasn't made it into the + curated list yet.""" + result = validate_requested_model("some-brand-new-model", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + # No suggestion text (no close matches) + assert "Similar models" not in result["message"] + assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower() + + +# --------------------------------------------------------------------------- +# opencode-zen: same pattern as opencode-go +# --------------------------------------------------------------------------- + + +@_patched +def test_opencode_zen_known_model_accepted(): + """opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog.""" + result = validate_requested_model("kimi-k2", "opencode-zen") + assert result["accepted"] is True + assert result["recognized"] is True + + +# --------------------------------------------------------------------------- +# Unknown provider with no catalog: soft-accept (honors the comment's intent) +# --------------------------------------------------------------------------- + + +@_patched +def test_provider_without_catalog_accepts_with_warning(): + """When a provider has no entry in _PROVIDER_MODELS and /models is + unreachable, accept the model with a 'Note:' warning rather than reject. + This matches the in-code comment: 'Accept and persist, but warn so typos + don't silently break things.'""" + # Use a made-up provider name that won't resolve to any catalog. + result = validate_requested_model("some-model", "provider-that-does-not-exist") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "Note:" in result["message"]