diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 7f6fe70d90a..fdb6e9f6e8a 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -662,6 +662,88 @@ def resolve_display_context_length( return None +# --------------------------------------------------------------------------- +# Configured-provider detection for typed model names +# --------------------------------------------------------------------------- + + +def _configured_provider_matches( + model_name: str, + user_providers: Optional[dict], + custom_providers: Optional[list], +) -> dict[str, str]: + """Return ``{provider_slug: canonical_model_id}`` for every configured + provider whose declared models contain an exact (case-insensitive) match + for ``model_name``. + + Used by :func:`switch_model` to route a *typed* model name to the provider + that actually declares it in user/custom provider config, instead of + leaving it on the current provider. Without this, a model declared under + ``providers.`` / ``custom_providers`` but typed while the current + provider is ``openai-codex`` stays on Codex and is soft-accepted as an + unknown hidden Codex model (#45006). + + Matching is exact (case-insensitive); the configured spelling is returned + so the downstream validation/override path sees the canonical id. Only the + explicitly-declared model collections are scanned (``models``, the singular + ``model``, and ``default_model``) — never fuzzy/family matching. + """ + if not model_name or not model_name.strip(): + return {} + target = model_name.strip().lower() + + def _match(value) -> Optional[str]: + """Canonical id if ``value`` (a model collection or scalar) declares + ``target``, else None.""" + if isinstance(value, str): + return value if value.strip().lower() == target else None + if isinstance(value, dict): + for mid in value: + if isinstance(mid, str) and mid.strip().lower() == target: + return mid + return None + if isinstance(value, (list, tuple)): + for item in value: + if isinstance(item, str) and item.strip().lower() == target: + return item + if isinstance(item, dict): + name = item.get("name") + if isinstance(name, str) and name.strip().lower() == target: + return name + return None + return None + + matches: dict[str, str] = {} + + if isinstance(user_providers, dict): + for slug, cfg in user_providers.items(): + if not isinstance(slug, str) or not isinstance(cfg, dict): + continue + for key in ("models", "model", "default_model"): + hit = _match(cfg.get(key)) + if hit: + matches[slug] = hit + break + + if isinstance(custom_providers, list): + for entry in custom_providers: + if not isinstance(entry, dict): + continue + name = entry.get("name") + if not isinstance(name, str) or not name.strip(): + continue + slug = f"custom:{name}" + if slug in matches: + continue + for key in ("models", "model", "default_model"): + hit = _match(entry.get(key)) + if hit: + matches[slug] = hit + break + + return matches + + # --------------------------------------------------------------------------- # Core model-switching pipeline # --------------------------------------------------------------------------- @@ -921,6 +1003,58 @@ def switch_model( resolved_in_current_catalog = True break + # --- Step d.5: configured-provider exact-match detection (#45006) --- + # If the typed model is declared in user/custom provider config, route + # to that provider BEFORE detect_provider_for_model() guesses from + # static catalogs and BEFORE the common-path validation can let a + # soft-accepting current provider (e.g. openai-codex) swallow the name + # as an unknown hidden model. Configured matches beat static-catalog + # detection. Unlike step e this is deliberately NOT gated on + # ``not is_custom`` — switching from a local/custom provider A to a + # configured provider B that declares the typed model is the point. + config_routed = False + if ( + not resolved_alias + and not resolved_in_current_catalog + and target_provider == current_provider + ): + cfg_matches = _configured_provider_matches( + new_model, user_providers, custom_providers + ) + if cfg_matches: + if current_provider in cfg_matches: + # The current provider itself declares it — keep current. + new_model = cfg_matches[current_provider] + config_routed = True + else: + match_slugs = sorted(cfg_matches) + if len(match_slugs) > 1: + return ModelSwitchResult( + success=False, + is_global=is_global, + error_message=( + f"'{new_model}' is declared by multiple configured " + f"providers ({', '.join(match_slugs)}). Re-run with " + f"--provider to choose which one to use." + ), + ) + target_provider = match_slugs[0] + new_model = cfg_matches[target_provider] + config_routed = True + logger.debug( + "Configured-provider detection routed '%s' to %s", + new_model, target_provider, + ) + # User-config providers (providers.) are resolved in + # the credential block via resolve_user_provider(), which is + # gated on explicit_provider. Mirror the picker so the + # rerouted user provider's base_url/key load from the passed + # config rather than a from-scratch runtime re-resolve that + # doesn't know user-config slugs. custom:* slugs resolve via + # resolve_runtime_provider() directly and need no hint. + if isinstance(user_providers, dict) and target_provider in user_providers: + explicit_provider = target_provider + # --- Step e: detect_provider_for_model() as last resort --- _base = current_base_url or "" is_custom = current_provider in {"custom", "local"} or ( @@ -932,6 +1066,7 @@ def switch_model( and not is_custom and not resolved_alias and not resolved_in_current_catalog + and not config_routed ): detected = detect_provider_for_model(new_model, current_provider) if detected: diff --git a/tests/hermes_cli/test_model_switch_configured_provider_routing.py b/tests/hermes_cli/test_model_switch_configured_provider_routing.py new file mode 100644 index 00000000000..361aa55f706 --- /dev/null +++ b/tests/hermes_cli/test_model_switch_configured_provider_routing.py @@ -0,0 +1,310 @@ +"""Regression tests for #45006: typed `/model ` resolution must route a +model declared in user/custom provider config to that provider instead of +leaving it on the current provider and soft-accepting it. + +Repro: with the current provider set to ``openai-codex``, typing +``/model qwen3.5-4b`` (a model the user declares under ``providers.`` or +``custom_providers``) showed ``Provider: OpenAI Codex`` — because typed +detection only consulted static catalogs / OpenRouter, never the user's +configured provider model lists, so the name stayed on Codex and was +soft-accepted as an unknown hidden Codex model. + +The fix adds an exact-match configured-provider detection step in +``switch_model`` that runs before ``detect_provider_for_model`` and before +common-path validation. These tests pin its precedence rules and prove the +deliberately-supported Codex hidden-model soft-accept (#16172 / #19729) is left +intact when nothing in config matches. + +Hermetic: the model-resolution chain is fully mocked (no network), mirroring +``tests/hermes_cli/test_user_providers_model_switch.py``. +""" + +from unittest.mock import patch + +from hermes_cli.model_switch import switch_model + +_ACCEPTED = {"accepted": True, "persist": True, "recognized": True, "message": None} +_REJECTED = {"accepted": False, "persist": False, "recognized": False, "message": "not found"} +# What validate_requested_model returns for an unknown id on openai-codex: it +# soft-accepts with a "may be a hidden model" note (#16172 / #19729). +_CODEX_SOFT_ACCEPT = { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + "Note: `gpt-5.9-codex-hidden` was not found in the OpenAI Codex model " + "listing. It may still work if your account has access to a newer or " + "hidden model ID." + ), +} + + +def _run_switch( + *, + raw_input, + current_provider, + user_providers=None, + custom_providers=None, + validation=_ACCEPTED, + current_model="old-model", + current_base_url="", +): + """Drive ``switch_model`` with the resolution chain mocked out. + + Every external lookup that would otherwise hit catalogs/network is patched: + alias resolution, aggregator catalog, ``detect_provider_for_model`` (so step + e is a no-op and cannot accidentally reroute), validation, credential + resolution, normalization, and model metadata. This isolates the new + configured-provider detection step. + """ + with patch("hermes_cli.model_switch.resolve_alias", return_value=None), \ + patch("hermes_cli.model_switch.list_provider_models", return_value=[]), \ + patch("hermes_cli.model_switch.normalize_model_for_provider", side_effect=lambda model, provider: model), \ + patch("hermes_cli.models.validate_requested_model", return_value=validation), \ + patch("hermes_cli.models.detect_provider_for_model", return_value=None), \ + patch("hermes_cli.model_switch.get_model_info", return_value=None), \ + patch("hermes_cli.model_switch.get_model_capabilities", return_value=None), \ + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={ + "api_key": "***", + "base_url": current_base_url or "http://resolved/v1", + "api_mode": "", + }, + ): + return switch_model( + raw_input=raw_input, + current_provider=current_provider, + current_model=current_model, + current_base_url=current_base_url, + user_providers=user_providers or {}, + custom_providers=custom_providers or [], + ) + + +def test_typed_configured_model_routes_away_from_openai_codex(): + """The core repro: a model declared under ``providers.`` typed while + on ``openai-codex`` routes to the configured provider, not Codex.""" + user_providers = { + "local-ollama": { + "name": "Local Ollama", + "base_url": "http://localhost:11434/v1", + "models": ["qwen3.5-4b", "kimi-k2.5"], + } + } + result = _run_switch( + raw_input="qwen3.5-4b", + current_provider="openai-codex", + current_model="gpt-5.4", + user_providers=user_providers, + ) + assert result.success is True, result.error_message + assert result.target_provider == "local-ollama" + assert result.new_model == "qwen3.5-4b" + + +def test_typed_configured_model_routes_to_custom_provider(): + """``custom_providers`` entries route to their ``custom:`` slug.""" + custom_providers = [ + { + "name": "mylocal", + "base_url": "http://localhost:1234/v1", + "model": "qwen3.5-4b", + "models": {"qwen3.5-4b": {}}, + } + ] + result = _run_switch( + raw_input="qwen3.5-4b", + current_provider="openai-codex", + current_model="gpt-5.4", + custom_providers=custom_providers, + ) + assert result.success is True, result.error_message + assert result.target_provider == "custom:mylocal" + assert result.new_model == "qwen3.5-4b" + + +def test_current_provider_declaring_model_is_not_rerouted(): + """Precedence rule 4: if the current provider declares the model, keep it — + even when another configured provider also declares the same id (so this + must NOT trip the ambiguity guard).""" + user_providers = { + "local-ollama": { + "name": "Local Ollama", + "base_url": "http://localhost:11434/v1", + "models": ["qwen3.5-4b"], + }, + "other-relay": { + "name": "Other Relay", + "base_url": "http://other/v1", + "models": ["qwen3.5-4b"], + }, + } + result = _run_switch( + raw_input="qwen3.5-4b", + current_provider="local-ollama", + current_model="kimi-k2.5", + current_base_url="http://localhost:11434/v1", + user_providers=user_providers, + ) + assert result.success is True, result.error_message + assert result.target_provider == "local-ollama" + + +def test_ambiguous_configured_model_fails_with_provider_hint(): + """Precedence rule 6: when two non-current providers declare the same id and + neither is current, fail clearly and point at ``--provider`` — never + silently pick the first match.""" + user_providers = { + "relay-a": { + "name": "Relay A", + "base_url": "http://a/v1", + "models": ["qwen3.5-4b"], + }, + "relay-b": { + "name": "Relay B", + "base_url": "http://b/v1", + "models": ["qwen3.5-4b"], + }, + } + result = _run_switch( + raw_input="qwen3.5-4b", + current_provider="openai-codex", + current_model="gpt-5.4", + user_providers=user_providers, + ) + assert result.success is False + assert "--provider" in result.error_message + assert "relay-a" in result.error_message + assert "relay-b" in result.error_message + + +def test_configured_model_absent_from_live_models_accepted_after_reroute(): + """End-to-end synergy: after rerouting to the configured provider, a live + ``/v1/models`` probe that does NOT list the model is still accepted via the + existing user-config override — proving the reroute lands on the right + provider for that override to match.""" + user_providers = { + "local-ollama": { + "name": "Local Ollama", + "base_url": "http://localhost:11434/v1", + "models": {"qwen3.5-4b": {"context_length": 32768}}, + } + } + result = _run_switch( + raw_input="qwen3.5-4b", + current_provider="openai-codex", + current_model="gpt-5.4", + user_providers=user_providers, + validation=_REJECTED, + ) + assert result.success is True, result.error_message + assert result.target_provider == "local-ollama" + assert result.new_model == "qwen3.5-4b" + + +def test_no_configured_match_leaves_current_provider_for_soft_accept(): + """The Codex hidden-model soft-accept (#16172 / #19729) is untouched: an + unknown id with no config match stays on the current provider and is + soft-accepted exactly as before.""" + result = _run_switch( + raw_input="gpt-5.9-codex-hidden", + current_provider="openai-codex", + current_model="gpt-5.4", + # Config is present but declares an unrelated model — detection is a no-op. + user_providers={ + "local-ollama": { + "base_url": "http://localhost:11434/v1", + "models": ["qwen3.5-4b"], + } + }, + validation=_CODEX_SOFT_ACCEPT, + ) + assert result.success is True, result.error_message + assert result.target_provider == "openai-codex" + assert result.new_model == "gpt-5.9-codex-hidden" + + +def test_configured_match_is_case_insensitive_and_returns_canonical_spelling(): + """Matching is case-insensitive but the configured spelling wins, so the + downstream validation/override path sees the canonical id.""" + user_providers = { + "local-ollama": { + "base_url": "http://localhost:11434/v1", + "models": ["Qwen3.5-4B"], + } + } + result = _run_switch( + raw_input="qwen3.5-4b", + current_provider="openai-codex", + current_model="gpt-5.4", + user_providers=user_providers, + ) + assert result.success is True, result.error_message + assert result.target_provider == "local-ollama" + assert result.new_model == "Qwen3.5-4B" + + +def test_default_model_only_declaration_routes(): + """A model declared ONLY via `default_model` (not in `models`) still routes + to that configured provider (#45006 — default_model is a declaring field).""" + user_providers = { + "local-ollama": { + "name": "Local Ollama", + "base_url": "http://localhost:11434/v1", + "default_model": "qwen3.5-4b", + } + } + result = _run_switch( + raw_input="qwen3.5-4b", + current_provider="openai-codex", + current_model="gpt-5.4", + user_providers=user_providers, + ) + assert result.success is True, result.error_message + assert result.target_provider == "local-ollama" + assert result.new_model == "qwen3.5-4b" + + +def test_malformed_provider_config_does_not_raise(): + """Garbage shapes in provider config must not crash detection — they're + skipped and the typed name falls through to the soft-accept no-op.""" + user_providers = { + "bad1": "not-a-dict", # non-dict cfg + "bad2": {"models": 12345}, # models as int + "bad3": {"models": [None, 7, {"noname": "x"}]}, # junk list items + "bad4": {"model": {"k": object()}}, # dict with non-target keys + } + custom_providers = [ + "not-a-dict", # non-dict entry + {"name": ""}, # empty name + {"models": ["unrelated-model"]}, # no name key + ] + result = _run_switch( + raw_input="gpt-5.9-codex-hidden", + current_provider="openai-codex", + current_model="gpt-5.4", + user_providers=user_providers, + custom_providers=custom_providers, + validation=_CODEX_SOFT_ACCEPT, + ) + # No match anywhere -> stays on codex, soft-accepted, no exception. + assert result.success is True, result.error_message + assert result.target_provider == "openai-codex" + + +def test_xai_oauth_soft_accept_preserved_when_no_match(): + """The xai-oauth hidden-model soft-accept (sibling of openai-codex) is also + a no-op when config declares no matching model.""" + user_providers = { + "local-ollama": {"base_url": "http://x/v1", "models": ["some-other-model"]}, + } + result = _run_switch( + raw_input="grok-hidden-preview", + current_provider="xai-oauth", + current_model="grok-4", + user_providers=user_providers, + validation=_CODEX_SOFT_ACCEPT, + ) + assert result.success is True, result.error_message + assert result.target_provider == "xai-oauth"