Merge pull request #51027 from NousResearch/salvage/typed-model-routing

fix(model_switch): route typed configured models off openai-codex (#45006)
2026-06-27 11:22:03 +00:00 · 2026-06-24 19:32:35 +05:30 · 2026-06-24 19:32:35 +05:30 · 2187fd884c
commit 2187fd884c
parent ae20c3fb90 1a174dfb50
4 changed files with 518 additions and 0 deletions
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -662,6 +662,88 @@ def resolve_display_context_length(
    return None


+# ---------------------------------------------------------------------------
+# Configured-provider detection for typed model names
+# ---------------------------------------------------------------------------
+
+
+def _configured_provider_matches(
+    model_name: str,
+    user_providers: Optional[dict],
+    custom_providers: Optional[list],
+) -> dict[str, str]:
+    """Return ``{provider_slug: canonical_model_id}`` for every configured
+    provider whose declared models contain an exact (case-insensitive) match
+    for ``model_name``.
+
+    Used by :func:`switch_model` to route a *typed* model name to the provider
+    that actually declares it in user/custom provider config, instead of
+    leaving it on the current provider.  Without this, a model declared under
+    ``providers.<slug>`` / ``custom_providers`` but typed while the current
+    provider is ``openai-codex`` stays on Codex and is soft-accepted as an
+    unknown hidden Codex model (#45006).
+
+    Matching is exact (case-insensitive); the configured spelling is returned
+    so the downstream validation/override path sees the canonical id.  Only the
+    explicitly-declared model collections are scanned (``models``, the singular
+    ``model``, and ``default_model``) — never fuzzy/family matching.
+    """
+    if not model_name or not model_name.strip():
+        return {}
+    target = model_name.strip().lower()
+
+    def _match(value) -> Optional[str]:
+        """Canonical id if ``value`` (a model collection or scalar) declares
+        ``target``, else None."""
+        if isinstance(value, str):
+            return value if value.strip().lower() == target else None
+        if isinstance(value, dict):
+            for mid in value:
+                if isinstance(mid, str) and mid.strip().lower() == target:
+                    return mid
+            return None
+        if isinstance(value, (list, tuple)):
+            for item in value:
+                if isinstance(item, str) and item.strip().lower() == target:
+                    return item
+                if isinstance(item, dict):
+                    name = item.get("name")
+                    if isinstance(name, str) and name.strip().lower() == target:
+                        return name
+            return None
+        return None
+
+    matches: dict[str, str] = {}
+
+    if isinstance(user_providers, dict):
+        for slug, cfg in user_providers.items():
+            if not isinstance(slug, str) or not isinstance(cfg, dict):
+                continue
+            for key in ("models", "model", "default_model"):
+                hit = _match(cfg.get(key))
+                if hit:
+                    matches[slug] = hit
+                    break
+
+    if isinstance(custom_providers, list):
+        for entry in custom_providers:
+            if not isinstance(entry, dict):
+                continue
+            name = entry.get("name")
+            if not isinstance(name, str) or not name.strip():
+                continue
+            slug = f"custom:{name}"
+            if slug in matches:
+                continue
+            for key in ("models", "model", "default_model"):
+                hit = _match(entry.get(key))
+                if hit:
+                    matches[slug] = hit
+                    break
+
+    return matches
+
+
 # ---------------------------------------------------------------------------
 # Core model-switching pipeline
 # ---------------------------------------------------------------------------
@ -921,6 +1003,58 @@ def switch_model(
                                resolved_in_current_catalog = True
                                break

+        # --- Step d.5: configured-provider exact-match detection (#45006) ---
+        # If the typed model is declared in user/custom provider config, route
+        # to that provider BEFORE detect_provider_for_model() guesses from
+        # static catalogs and BEFORE the common-path validation can let a
+        # soft-accepting current provider (e.g. openai-codex) swallow the name
+        # as an unknown hidden model.  Configured matches beat static-catalog
+        # detection.  Unlike step e this is deliberately NOT gated on
+        # ``not is_custom`` — switching from a local/custom provider A to a
+        # configured provider B that declares the typed model is the point.
+        config_routed = False
+        if (
+            not resolved_alias
+            and not resolved_in_current_catalog
+            and target_provider == current_provider
+        ):
+            cfg_matches = _configured_provider_matches(
+                new_model, user_providers, custom_providers
+            )
+            if cfg_matches:
+                if current_provider in cfg_matches:
+                    # The current provider itself declares it — keep current.
+                    new_model = cfg_matches[current_provider]
+                    config_routed = True
+                else:
+                    match_slugs = sorted(cfg_matches)
+                    if len(match_slugs) > 1:
+                        return ModelSwitchResult(
+                            success=False,
+                            is_global=is_global,
+                            error_message=(
+                                f"'{new_model}' is declared by multiple configured "
+                                f"providers ({', '.join(match_slugs)}). Re-run with "
+                                f"--provider <slug> to choose which one to use."
+                            ),
+                        )
+                    target_provider = match_slugs[0]
+                    new_model = cfg_matches[target_provider]
+                    config_routed = True
+                    logger.debug(
+                        "Configured-provider detection routed '%s' to %s",
+                        new_model, target_provider,
+                    )
+                    # User-config providers (providers.<slug>) are resolved in
+                    # the credential block via resolve_user_provider(), which is
+                    # gated on explicit_provider.  Mirror the picker so the
+                    # rerouted user provider's base_url/key load from the passed
+                    # config rather than a from-scratch runtime re-resolve that
+                    # doesn't know user-config slugs.  custom:* slugs resolve via
+                    # resolve_runtime_provider() directly and need no hint.
+                    if isinstance(user_providers, dict) and target_provider in user_providers:
+                        explicit_provider = target_provider
+
        # --- Step e: detect_provider_for_model() as last resort ---
        _base = current_base_url or ""
        is_custom = current_provider in {"custom", "local"} or (
@ -932,6 +1066,7 @@ def switch_model(
            and not is_custom
            and not resolved_alias
            and not resolved_in_current_catalog
+            and not config_routed
        ):
            detected = detect_provider_for_model(new_model, current_provider)
            if detected:
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -3793,6 +3793,37 @@ def validate_requested_model(
            if suggestions:
                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
            provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok / Premium+)"
+            # Plausibility gate (#45006): the soft-accept (#16172 / #19729) exists
+            # for entitlement-gated *hidden* slugs the curated listing hasn't
+            # caught up with — but those are always the provider's own family
+            # (openai-codex -> gpt-*; xai-oauth -> grok-*). Accepting an
+            # unrelated typed name (e.g. `qwen3.5-4b`, `llama-3.1-8b`) here turns
+            # what should be an actionable "did you mean --provider <x>?" error
+            # into a confusing success that 400s on the next turn. Only soft-
+            # accept names that share the provider's family prefix; reject the
+            # rest with guidance to pin the right provider.
+            _family_prefixes = {
+                "openai-codex": ("gpt-", "codex-", "o1", "o3", "o4"),
+                "xai-oauth": ("grok-",),
+            }.get(normalized, ())
+            _lower = requested_for_lookup.strip().lower()
+            _plausible = (not _family_prefixes) or any(
+                _lower.startswith(p) for p in _family_prefixes
+            )
+            if not _plausible:
+                return {
+                    "accepted": False,
+                    "persist": False,
+                    "recognized": False,
+                    "message": (
+                        f"`{requested}` doesn't look like a {provider_label} model "
+                        f"and isn't in its listing, so it was not accepted. If it "
+                        f"belongs to another configured provider, switch with "
+                        f"`--provider <slug>` (or select it from the `/model` "
+                        f"picker)."
+                        f"{suggestion_text}"
+                    ),
+                }
            return {
                "accepted": True,
                "persist": True,
--- a/tests/hermes_cli/test_model_switch_configured_provider_routing.py
+++ b/tests/hermes_cli/test_model_switch_configured_provider_routing.py
@ -0,0 +1,310 @@
+"""Regression tests for #45006: typed `/model <name>` resolution must route a
+model declared in user/custom provider config to that provider instead of
+leaving it on the current provider and soft-accepting it.
+
+Repro: with the current provider set to ``openai-codex``, typing
+``/model qwen3.5-4b`` (a model the user declares under ``providers.<slug>`` or
+``custom_providers``) showed ``Provider: OpenAI Codex`` — because typed
+detection only consulted static catalogs / OpenRouter, never the user's
+configured provider model lists, so the name stayed on Codex and was
+soft-accepted as an unknown hidden Codex model.
+
+The fix adds an exact-match configured-provider detection step in
+``switch_model`` that runs before ``detect_provider_for_model`` and before
+common-path validation.  These tests pin its precedence rules and prove the
+deliberately-supported Codex hidden-model soft-accept (#16172 / #19729) is left
+intact when nothing in config matches.
+
+Hermetic: the model-resolution chain is fully mocked (no network), mirroring
+``tests/hermes_cli/test_user_providers_model_switch.py``.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import switch_model
+
+_ACCEPTED = {"accepted": True, "persist": True, "recognized": True, "message": None}
+_REJECTED = {"accepted": False, "persist": False, "recognized": False, "message": "not found"}
+# What validate_requested_model returns for an unknown id on openai-codex: it
+# soft-accepts with a "may be a hidden model" note (#16172 / #19729).
+_CODEX_SOFT_ACCEPT = {
+    "accepted": True,
+    "persist": True,
+    "recognized": False,
+    "message": (
+        "Note: `gpt-5.9-codex-hidden` was not found in the OpenAI Codex model "
+        "listing. It may still work if your account has access to a newer or "
+        "hidden model ID."
+    ),
+}
+
+
+def _run_switch(
+    *,
+    raw_input,
+    current_provider,
+    user_providers=None,
+    custom_providers=None,
+    validation=_ACCEPTED,
+    current_model="old-model",
+    current_base_url="",
+):
+    """Drive ``switch_model`` with the resolution chain mocked out.
+
+    Every external lookup that would otherwise hit catalogs/network is patched:
+    alias resolution, aggregator catalog, ``detect_provider_for_model`` (so step
+    e is a no-op and cannot accidentally reroute), validation, credential
+    resolution, normalization, and model metadata.  This isolates the new
+    configured-provider detection step.
+    """
+    with patch("hermes_cli.model_switch.resolve_alias", return_value=None), \
+         patch("hermes_cli.model_switch.list_provider_models", return_value=[]), \
+         patch("hermes_cli.model_switch.normalize_model_for_provider", side_effect=lambda model, provider: model), \
+         patch("hermes_cli.models.validate_requested_model", return_value=validation), \
+         patch("hermes_cli.models.detect_provider_for_model", return_value=None), \
+         patch("hermes_cli.model_switch.get_model_info", return_value=None), \
+         patch("hermes_cli.model_switch.get_model_capabilities", return_value=None), \
+         patch(
+             "hermes_cli.runtime_provider.resolve_runtime_provider",
+             return_value={
+                 "api_key": "***",
+                 "base_url": current_base_url or "http://resolved/v1",
+                 "api_mode": "",
+             },
+         ):
+        return switch_model(
+            raw_input=raw_input,
+            current_provider=current_provider,
+            current_model=current_model,
+            current_base_url=current_base_url,
+            user_providers=user_providers or {},
+            custom_providers=custom_providers or [],
+        )
+
+
+def test_typed_configured_model_routes_away_from_openai_codex():
+    """The core repro: a model declared under ``providers.<slug>`` typed while
+    on ``openai-codex`` routes to the configured provider, not Codex."""
+    user_providers = {
+        "local-ollama": {
+            "name": "Local Ollama",
+            "base_url": "http://localhost:11434/v1",
+            "models": ["qwen3.5-4b", "kimi-k2.5"],
+        }
+    }
+    result = _run_switch(
+        raw_input="qwen3.5-4b",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        user_providers=user_providers,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "local-ollama"
+    assert result.new_model == "qwen3.5-4b"
+
+
+def test_typed_configured_model_routes_to_custom_provider():
+    """``custom_providers`` entries route to their ``custom:<name>`` slug."""
+    custom_providers = [
+        {
+            "name": "mylocal",
+            "base_url": "http://localhost:1234/v1",
+            "model": "qwen3.5-4b",
+            "models": {"qwen3.5-4b": {}},
+        }
+    ]
+    result = _run_switch(
+        raw_input="qwen3.5-4b",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        custom_providers=custom_providers,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "custom:mylocal"
+    assert result.new_model == "qwen3.5-4b"
+
+
+def test_current_provider_declaring_model_is_not_rerouted():
+    """Precedence rule 4: if the current provider declares the model, keep it —
+    even when another configured provider also declares the same id (so this
+    must NOT trip the ambiguity guard)."""
+    user_providers = {
+        "local-ollama": {
+            "name": "Local Ollama",
+            "base_url": "http://localhost:11434/v1",
+            "models": ["qwen3.5-4b"],
+        },
+        "other-relay": {
+            "name": "Other Relay",
+            "base_url": "http://other/v1",
+            "models": ["qwen3.5-4b"],
+        },
+    }
+    result = _run_switch(
+        raw_input="qwen3.5-4b",
+        current_provider="local-ollama",
+        current_model="kimi-k2.5",
+        current_base_url="http://localhost:11434/v1",
+        user_providers=user_providers,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "local-ollama"
+
+
+def test_ambiguous_configured_model_fails_with_provider_hint():
+    """Precedence rule 6: when two non-current providers declare the same id and
+    neither is current, fail clearly and point at ``--provider`` — never
+    silently pick the first match."""
+    user_providers = {
+        "relay-a": {
+            "name": "Relay A",
+            "base_url": "http://a/v1",
+            "models": ["qwen3.5-4b"],
+        },
+        "relay-b": {
+            "name": "Relay B",
+            "base_url": "http://b/v1",
+            "models": ["qwen3.5-4b"],
+        },
+    }
+    result = _run_switch(
+        raw_input="qwen3.5-4b",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        user_providers=user_providers,
+    )
+    assert result.success is False
+    assert "--provider" in result.error_message
+    assert "relay-a" in result.error_message
+    assert "relay-b" in result.error_message
+
+
+def test_configured_model_absent_from_live_models_accepted_after_reroute():
+    """End-to-end synergy: after rerouting to the configured provider, a live
+    ``/v1/models`` probe that does NOT list the model is still accepted via the
+    existing user-config override — proving the reroute lands on the right
+    provider for that override to match."""
+    user_providers = {
+        "local-ollama": {
+            "name": "Local Ollama",
+            "base_url": "http://localhost:11434/v1",
+            "models": {"qwen3.5-4b": {"context_length": 32768}},
+        }
+    }
+    result = _run_switch(
+        raw_input="qwen3.5-4b",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        user_providers=user_providers,
+        validation=_REJECTED,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "local-ollama"
+    assert result.new_model == "qwen3.5-4b"
+
+
+def test_no_configured_match_leaves_current_provider_for_soft_accept():
+    """The Codex hidden-model soft-accept (#16172 / #19729) is untouched: an
+    unknown id with no config match stays on the current provider and is
+    soft-accepted exactly as before."""
+    result = _run_switch(
+        raw_input="gpt-5.9-codex-hidden",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        # Config is present but declares an unrelated model — detection is a no-op.
+        user_providers={
+            "local-ollama": {
+                "base_url": "http://localhost:11434/v1",
+                "models": ["qwen3.5-4b"],
+            }
+        },
+        validation=_CODEX_SOFT_ACCEPT,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "openai-codex"
+    assert result.new_model == "gpt-5.9-codex-hidden"
+
+
+def test_configured_match_is_case_insensitive_and_returns_canonical_spelling():
+    """Matching is case-insensitive but the configured spelling wins, so the
+    downstream validation/override path sees the canonical id."""
+    user_providers = {
+        "local-ollama": {
+            "base_url": "http://localhost:11434/v1",
+            "models": ["Qwen3.5-4B"],
+        }
+    }
+    result = _run_switch(
+        raw_input="qwen3.5-4b",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        user_providers=user_providers,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "local-ollama"
+    assert result.new_model == "Qwen3.5-4B"
+
+
+def test_default_model_only_declaration_routes():
+    """A model declared ONLY via `default_model` (not in `models`) still routes
+    to that configured provider (#45006 — default_model is a declaring field)."""
+    user_providers = {
+        "local-ollama": {
+            "name": "Local Ollama",
+            "base_url": "http://localhost:11434/v1",
+            "default_model": "qwen3.5-4b",
+        }
+    }
+    result = _run_switch(
+        raw_input="qwen3.5-4b",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        user_providers=user_providers,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "local-ollama"
+    assert result.new_model == "qwen3.5-4b"
+
+
+def test_malformed_provider_config_does_not_raise():
+    """Garbage shapes in provider config must not crash detection — they're
+    skipped and the typed name falls through to the soft-accept no-op."""
+    user_providers = {
+        "bad1": "not-a-dict",            # non-dict cfg
+        "bad2": {"models": 12345},        # models as int
+        "bad3": {"models": [None, 7, {"noname": "x"}]},  # junk list items
+        "bad4": {"model": {"k": object()}},  # dict with non-target keys
+    }
+    custom_providers = [
+        "not-a-dict",                     # non-dict entry
+        {"name": ""},                     # empty name
+        {"models": ["unrelated-model"]},  # no name key
+    ]
+    result = _run_switch(
+        raw_input="gpt-5.9-codex-hidden",
+        current_provider="openai-codex",
+        current_model="gpt-5.4",
+        user_providers=user_providers,
+        custom_providers=custom_providers,
+        validation=_CODEX_SOFT_ACCEPT,
+    )
+    # No match anywhere -> stays on codex, soft-accepted, no exception.
+    assert result.success is True, result.error_message
+    assert result.target_provider == "openai-codex"
+
+
+def test_xai_oauth_soft_accept_preserved_when_no_match():
+    """The xai-oauth hidden-model soft-accept (sibling of openai-codex) is also
+    a no-op when config declares no matching model."""
+    user_providers = {
+        "local-ollama": {"base_url": "http://x/v1", "models": ["some-other-model"]},
+    }
+    result = _run_switch(
+        raw_input="grok-hidden-preview",
+        current_provider="xai-oauth",
+        current_model="grok-4",
+        user_providers=user_providers,
+        validation=_CODEX_SOFT_ACCEPT,
+    )
+    assert result.success is True, result.error_message
+    assert result.target_provider == "xai-oauth"
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@ -907,3 +907,45 @@ class TestNousRecommendedModels:
            patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
        ):
            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
+
+
+class TestCodexSoftAcceptPlausibilityGate:
+    """#45006 kernel (b): the openai-codex / xai-oauth hidden-model soft-accept
+    (#16172 / #19729) must only accept slugs that plausibly belong to that
+    provider's family. An undeclared, unrelated typed name (e.g. a local model
+    name) must be REJECTED with actionable --provider guidance instead of being
+    fake-accepted as a hidden Codex/Grok model (which would 400 on the next turn
+    and mislabel the provider as 'OpenAI Codex')."""
+
+    def test_unrelated_name_rejected_on_openai_codex(self):
+        from hermes_cli.models import validate_requested_model
+        r = validate_requested_model("qwen3.5-4b", "openai-codex")
+        assert r["accepted"] is False
+        assert r["persist"] is False
+        assert "--provider" in (r["message"] or "")
+
+    def test_unrelated_name_rejected_on_xai_oauth(self):
+        from hermes_cli.models import validate_requested_model
+        r = validate_requested_model("llama-3.1-8b", "xai-oauth")
+        assert r["accepted"] is False
+        assert "--provider" in (r["message"] or "")
+
+    def test_family_shaped_hidden_slug_still_soft_accepted_codex(self):
+        """#16172 intent preserved: a gpt-/codex-shaped unknown slug is still
+        soft-accepted (entitlement-gated hidden models)."""
+        from hermes_cli.models import validate_requested_model
+        r = validate_requested_model("gpt-5.9-codex-hidden", "openai-codex")
+        assert r["accepted"] is True
+        assert r["recognized"] is False
+
+    def test_family_shaped_hidden_slug_still_soft_accepted_xai(self):
+        from hermes_cli.models import validate_requested_model
+        r = validate_requested_model("grok-9-hidden", "xai-oauth")
+        assert r["accepted"] is True
+        assert r["recognized"] is False
+
+    def test_real_catalog_model_unaffected(self):
+        from hermes_cli.models import validate_requested_model
+        r = validate_requested_model("gpt-5.5", "openai-codex")
+        assert r["accepted"] is True
+        assert r["recognized"] is True