Merge pull request #51027 from NousResearch/salvage/typed-model-routing

fix(model_switch): route typed configured models off openai-codex (#45006)
This commit is contained in:
kshitij 2026-06-24 19:32:35 +05:30 committed by GitHub
commit 2187fd884c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 518 additions and 0 deletions

View file

@ -662,6 +662,88 @@ def resolve_display_context_length(
return None
# ---------------------------------------------------------------------------
# Configured-provider detection for typed model names
# ---------------------------------------------------------------------------
def _configured_provider_matches(
model_name: str,
user_providers: Optional[dict],
custom_providers: Optional[list],
) -> dict[str, str]:
"""Return ``{provider_slug: canonical_model_id}`` for every configured
provider whose declared models contain an exact (case-insensitive) match
for ``model_name``.
Used by :func:`switch_model` to route a *typed* model name to the provider
that actually declares it in user/custom provider config, instead of
leaving it on the current provider. Without this, a model declared under
``providers.<slug>`` / ``custom_providers`` but typed while the current
provider is ``openai-codex`` stays on Codex and is soft-accepted as an
unknown hidden Codex model (#45006).
Matching is exact (case-insensitive); the configured spelling is returned
so the downstream validation/override path sees the canonical id. Only the
explicitly-declared model collections are scanned (``models``, the singular
``model``, and ``default_model``) never fuzzy/family matching.
"""
if not model_name or not model_name.strip():
return {}
target = model_name.strip().lower()
def _match(value) -> Optional[str]:
"""Canonical id if ``value`` (a model collection or scalar) declares
``target``, else None."""
if isinstance(value, str):
return value if value.strip().lower() == target else None
if isinstance(value, dict):
for mid in value:
if isinstance(mid, str) and mid.strip().lower() == target:
return mid
return None
if isinstance(value, (list, tuple)):
for item in value:
if isinstance(item, str) and item.strip().lower() == target:
return item
if isinstance(item, dict):
name = item.get("name")
if isinstance(name, str) and name.strip().lower() == target:
return name
return None
return None
matches: dict[str, str] = {}
if isinstance(user_providers, dict):
for slug, cfg in user_providers.items():
if not isinstance(slug, str) or not isinstance(cfg, dict):
continue
for key in ("models", "model", "default_model"):
hit = _match(cfg.get(key))
if hit:
matches[slug] = hit
break
if isinstance(custom_providers, list):
for entry in custom_providers:
if not isinstance(entry, dict):
continue
name = entry.get("name")
if not isinstance(name, str) or not name.strip():
continue
slug = f"custom:{name}"
if slug in matches:
continue
for key in ("models", "model", "default_model"):
hit = _match(entry.get(key))
if hit:
matches[slug] = hit
break
return matches
# ---------------------------------------------------------------------------
# Core model-switching pipeline
# ---------------------------------------------------------------------------
@ -921,6 +1003,58 @@ def switch_model(
resolved_in_current_catalog = True
break
# --- Step d.5: configured-provider exact-match detection (#45006) ---
# If the typed model is declared in user/custom provider config, route
# to that provider BEFORE detect_provider_for_model() guesses from
# static catalogs and BEFORE the common-path validation can let a
# soft-accepting current provider (e.g. openai-codex) swallow the name
# as an unknown hidden model. Configured matches beat static-catalog
# detection. Unlike step e this is deliberately NOT gated on
# ``not is_custom`` — switching from a local/custom provider A to a
# configured provider B that declares the typed model is the point.
config_routed = False
if (
not resolved_alias
and not resolved_in_current_catalog
and target_provider == current_provider
):
cfg_matches = _configured_provider_matches(
new_model, user_providers, custom_providers
)
if cfg_matches:
if current_provider in cfg_matches:
# The current provider itself declares it — keep current.
new_model = cfg_matches[current_provider]
config_routed = True
else:
match_slugs = sorted(cfg_matches)
if len(match_slugs) > 1:
return ModelSwitchResult(
success=False,
is_global=is_global,
error_message=(
f"'{new_model}' is declared by multiple configured "
f"providers ({', '.join(match_slugs)}). Re-run with "
f"--provider <slug> to choose which one to use."
),
)
target_provider = match_slugs[0]
new_model = cfg_matches[target_provider]
config_routed = True
logger.debug(
"Configured-provider detection routed '%s' to %s",
new_model, target_provider,
)
# User-config providers (providers.<slug>) are resolved in
# the credential block via resolve_user_provider(), which is
# gated on explicit_provider. Mirror the picker so the
# rerouted user provider's base_url/key load from the passed
# config rather than a from-scratch runtime re-resolve that
# doesn't know user-config slugs. custom:* slugs resolve via
# resolve_runtime_provider() directly and need no hint.
if isinstance(user_providers, dict) and target_provider in user_providers:
explicit_provider = target_provider
# --- Step e: detect_provider_for_model() as last resort ---
_base = current_base_url or ""
is_custom = current_provider in {"custom", "local"} or (
@ -932,6 +1066,7 @@ def switch_model(
and not is_custom
and not resolved_alias
and not resolved_in_current_catalog
and not config_routed
):
detected = detect_provider_for_model(new_model, current_provider)
if detected:

View file

@ -3793,6 +3793,37 @@ def validate_requested_model(
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok / Premium+)"
# Plausibility gate (#45006): the soft-accept (#16172 / #19729) exists
# for entitlement-gated *hidden* slugs the curated listing hasn't
# caught up with — but those are always the provider's own family
# (openai-codex -> gpt-*; xai-oauth -> grok-*). Accepting an
# unrelated typed name (e.g. `qwen3.5-4b`, `llama-3.1-8b`) here turns
# what should be an actionable "did you mean --provider <x>?" error
# into a confusing success that 400s on the next turn. Only soft-
# accept names that share the provider's family prefix; reject the
# rest with guidance to pin the right provider.
_family_prefixes = {
"openai-codex": ("gpt-", "codex-", "o1", "o3", "o4"),
"xai-oauth": ("grok-",),
}.get(normalized, ())
_lower = requested_for_lookup.strip().lower()
_plausible = (not _family_prefixes) or any(
_lower.startswith(p) for p in _family_prefixes
)
if not _plausible:
return {
"accepted": False,
"persist": False,
"recognized": False,
"message": (
f"`{requested}` doesn't look like a {provider_label} model "
f"and isn't in its listing, so it was not accepted. If it "
f"belongs to another configured provider, switch with "
f"`--provider <slug>` (or select it from the `/model` "
f"picker)."
f"{suggestion_text}"
),
}
return {
"accepted": True,
"persist": True,

View file

@ -0,0 +1,310 @@
"""Regression tests for #45006: typed `/model <name>` resolution must route a
model declared in user/custom provider config to that provider instead of
leaving it on the current provider and soft-accepting it.
Repro: with the current provider set to ``openai-codex``, typing
``/model qwen3.5-4b`` (a model the user declares under ``providers.<slug>`` or
``custom_providers``) showed ``Provider: OpenAI Codex`` because typed
detection only consulted static catalogs / OpenRouter, never the user's
configured provider model lists, so the name stayed on Codex and was
soft-accepted as an unknown hidden Codex model.
The fix adds an exact-match configured-provider detection step in
``switch_model`` that runs before ``detect_provider_for_model`` and before
common-path validation. These tests pin its precedence rules and prove the
deliberately-supported Codex hidden-model soft-accept (#16172 / #19729) is left
intact when nothing in config matches.
Hermetic: the model-resolution chain is fully mocked (no network), mirroring
``tests/hermes_cli/test_user_providers_model_switch.py``.
"""
from unittest.mock import patch
from hermes_cli.model_switch import switch_model
_ACCEPTED = {"accepted": True, "persist": True, "recognized": True, "message": None}
_REJECTED = {"accepted": False, "persist": False, "recognized": False, "message": "not found"}
# What validate_requested_model returns for an unknown id on openai-codex: it
# soft-accepts with a "may be a hidden model" note (#16172 / #19729).
_CODEX_SOFT_ACCEPT = {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
"Note: `gpt-5.9-codex-hidden` was not found in the OpenAI Codex model "
"listing. It may still work if your account has access to a newer or "
"hidden model ID."
),
}
def _run_switch(
*,
raw_input,
current_provider,
user_providers=None,
custom_providers=None,
validation=_ACCEPTED,
current_model="old-model",
current_base_url="",
):
"""Drive ``switch_model`` with the resolution chain mocked out.
Every external lookup that would otherwise hit catalogs/network is patched:
alias resolution, aggregator catalog, ``detect_provider_for_model`` (so step
e is a no-op and cannot accidentally reroute), validation, credential
resolution, normalization, and model metadata. This isolates the new
configured-provider detection step.
"""
with patch("hermes_cli.model_switch.resolve_alias", return_value=None), \
patch("hermes_cli.model_switch.list_provider_models", return_value=[]), \
patch("hermes_cli.model_switch.normalize_model_for_provider", side_effect=lambda model, provider: model), \
patch("hermes_cli.models.validate_requested_model", return_value=validation), \
patch("hermes_cli.models.detect_provider_for_model", return_value=None), \
patch("hermes_cli.model_switch.get_model_info", return_value=None), \
patch("hermes_cli.model_switch.get_model_capabilities", return_value=None), \
patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
return_value={
"api_key": "***",
"base_url": current_base_url or "http://resolved/v1",
"api_mode": "",
},
):
return switch_model(
raw_input=raw_input,
current_provider=current_provider,
current_model=current_model,
current_base_url=current_base_url,
user_providers=user_providers or {},
custom_providers=custom_providers or [],
)
def test_typed_configured_model_routes_away_from_openai_codex():
"""The core repro: a model declared under ``providers.<slug>`` typed while
on ``openai-codex`` routes to the configured provider, not Codex."""
user_providers = {
"local-ollama": {
"name": "Local Ollama",
"base_url": "http://localhost:11434/v1",
"models": ["qwen3.5-4b", "kimi-k2.5"],
}
}
result = _run_switch(
raw_input="qwen3.5-4b",
current_provider="openai-codex",
current_model="gpt-5.4",
user_providers=user_providers,
)
assert result.success is True, result.error_message
assert result.target_provider == "local-ollama"
assert result.new_model == "qwen3.5-4b"
def test_typed_configured_model_routes_to_custom_provider():
"""``custom_providers`` entries route to their ``custom:<name>`` slug."""
custom_providers = [
{
"name": "mylocal",
"base_url": "http://localhost:1234/v1",
"model": "qwen3.5-4b",
"models": {"qwen3.5-4b": {}},
}
]
result = _run_switch(
raw_input="qwen3.5-4b",
current_provider="openai-codex",
current_model="gpt-5.4",
custom_providers=custom_providers,
)
assert result.success is True, result.error_message
assert result.target_provider == "custom:mylocal"
assert result.new_model == "qwen3.5-4b"
def test_current_provider_declaring_model_is_not_rerouted():
"""Precedence rule 4: if the current provider declares the model, keep it —
even when another configured provider also declares the same id (so this
must NOT trip the ambiguity guard)."""
user_providers = {
"local-ollama": {
"name": "Local Ollama",
"base_url": "http://localhost:11434/v1",
"models": ["qwen3.5-4b"],
},
"other-relay": {
"name": "Other Relay",
"base_url": "http://other/v1",
"models": ["qwen3.5-4b"],
},
}
result = _run_switch(
raw_input="qwen3.5-4b",
current_provider="local-ollama",
current_model="kimi-k2.5",
current_base_url="http://localhost:11434/v1",
user_providers=user_providers,
)
assert result.success is True, result.error_message
assert result.target_provider == "local-ollama"
def test_ambiguous_configured_model_fails_with_provider_hint():
"""Precedence rule 6: when two non-current providers declare the same id and
neither is current, fail clearly and point at ``--provider`` never
silently pick the first match."""
user_providers = {
"relay-a": {
"name": "Relay A",
"base_url": "http://a/v1",
"models": ["qwen3.5-4b"],
},
"relay-b": {
"name": "Relay B",
"base_url": "http://b/v1",
"models": ["qwen3.5-4b"],
},
}
result = _run_switch(
raw_input="qwen3.5-4b",
current_provider="openai-codex",
current_model="gpt-5.4",
user_providers=user_providers,
)
assert result.success is False
assert "--provider" in result.error_message
assert "relay-a" in result.error_message
assert "relay-b" in result.error_message
def test_configured_model_absent_from_live_models_accepted_after_reroute():
"""End-to-end synergy: after rerouting to the configured provider, a live
``/v1/models`` probe that does NOT list the model is still accepted via the
existing user-config override proving the reroute lands on the right
provider for that override to match."""
user_providers = {
"local-ollama": {
"name": "Local Ollama",
"base_url": "http://localhost:11434/v1",
"models": {"qwen3.5-4b": {"context_length": 32768}},
}
}
result = _run_switch(
raw_input="qwen3.5-4b",
current_provider="openai-codex",
current_model="gpt-5.4",
user_providers=user_providers,
validation=_REJECTED,
)
assert result.success is True, result.error_message
assert result.target_provider == "local-ollama"
assert result.new_model == "qwen3.5-4b"
def test_no_configured_match_leaves_current_provider_for_soft_accept():
"""The Codex hidden-model soft-accept (#16172 / #19729) is untouched: an
unknown id with no config match stays on the current provider and is
soft-accepted exactly as before."""
result = _run_switch(
raw_input="gpt-5.9-codex-hidden",
current_provider="openai-codex",
current_model="gpt-5.4",
# Config is present but declares an unrelated model — detection is a no-op.
user_providers={
"local-ollama": {
"base_url": "http://localhost:11434/v1",
"models": ["qwen3.5-4b"],
}
},
validation=_CODEX_SOFT_ACCEPT,
)
assert result.success is True, result.error_message
assert result.target_provider == "openai-codex"
assert result.new_model == "gpt-5.9-codex-hidden"
def test_configured_match_is_case_insensitive_and_returns_canonical_spelling():
"""Matching is case-insensitive but the configured spelling wins, so the
downstream validation/override path sees the canonical id."""
user_providers = {
"local-ollama": {
"base_url": "http://localhost:11434/v1",
"models": ["Qwen3.5-4B"],
}
}
result = _run_switch(
raw_input="qwen3.5-4b",
current_provider="openai-codex",
current_model="gpt-5.4",
user_providers=user_providers,
)
assert result.success is True, result.error_message
assert result.target_provider == "local-ollama"
assert result.new_model == "Qwen3.5-4B"
def test_default_model_only_declaration_routes():
"""A model declared ONLY via `default_model` (not in `models`) still routes
to that configured provider (#45006 — default_model is a declaring field)."""
user_providers = {
"local-ollama": {
"name": "Local Ollama",
"base_url": "http://localhost:11434/v1",
"default_model": "qwen3.5-4b",
}
}
result = _run_switch(
raw_input="qwen3.5-4b",
current_provider="openai-codex",
current_model="gpt-5.4",
user_providers=user_providers,
)
assert result.success is True, result.error_message
assert result.target_provider == "local-ollama"
assert result.new_model == "qwen3.5-4b"
def test_malformed_provider_config_does_not_raise():
"""Garbage shapes in provider config must not crash detection — they're
skipped and the typed name falls through to the soft-accept no-op."""
user_providers = {
"bad1": "not-a-dict", # non-dict cfg
"bad2": {"models": 12345}, # models as int
"bad3": {"models": [None, 7, {"noname": "x"}]}, # junk list items
"bad4": {"model": {"k": object()}}, # dict with non-target keys
}
custom_providers = [
"not-a-dict", # non-dict entry
{"name": ""}, # empty name
{"models": ["unrelated-model"]}, # no name key
]
result = _run_switch(
raw_input="gpt-5.9-codex-hidden",
current_provider="openai-codex",
current_model="gpt-5.4",
user_providers=user_providers,
custom_providers=custom_providers,
validation=_CODEX_SOFT_ACCEPT,
)
# No match anywhere -> stays on codex, soft-accepted, no exception.
assert result.success is True, result.error_message
assert result.target_provider == "openai-codex"
def test_xai_oauth_soft_accept_preserved_when_no_match():
"""The xai-oauth hidden-model soft-accept (sibling of openai-codex) is also
a no-op when config declares no matching model."""
user_providers = {
"local-ollama": {"base_url": "http://x/v1", "models": ["some-other-model"]},
}
result = _run_switch(
raw_input="grok-hidden-preview",
current_provider="xai-oauth",
current_model="grok-4",
user_providers=user_providers,
validation=_CODEX_SOFT_ACCEPT,
)
assert result.success is True, result.error_message
assert result.target_provider == "xai-oauth"

View file

@ -907,3 +907,45 @@ class TestNousRecommendedModels:
patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
):
assert get_nous_recommended_aux_model(vision=False) == "paid-model"
class TestCodexSoftAcceptPlausibilityGate:
"""#45006 kernel (b): the openai-codex / xai-oauth hidden-model soft-accept
(#16172 / #19729) must only accept slugs that plausibly belong to that
provider's family. An undeclared, unrelated typed name (e.g. a local model
name) must be REJECTED with actionable --provider guidance instead of being
fake-accepted as a hidden Codex/Grok model (which would 400 on the next turn
and mislabel the provider as 'OpenAI Codex')."""
def test_unrelated_name_rejected_on_openai_codex(self):
from hermes_cli.models import validate_requested_model
r = validate_requested_model("qwen3.5-4b", "openai-codex")
assert r["accepted"] is False
assert r["persist"] is False
assert "--provider" in (r["message"] or "")
def test_unrelated_name_rejected_on_xai_oauth(self):
from hermes_cli.models import validate_requested_model
r = validate_requested_model("llama-3.1-8b", "xai-oauth")
assert r["accepted"] is False
assert "--provider" in (r["message"] or "")
def test_family_shaped_hidden_slug_still_soft_accepted_codex(self):
"""#16172 intent preserved: a gpt-/codex-shaped unknown slug is still
soft-accepted (entitlement-gated hidden models)."""
from hermes_cli.models import validate_requested_model
r = validate_requested_model("gpt-5.9-codex-hidden", "openai-codex")
assert r["accepted"] is True
assert r["recognized"] is False
def test_family_shaped_hidden_slug_still_soft_accepted_xai(self):
from hermes_cli.models import validate_requested_model
r = validate_requested_model("grok-9-hidden", "xai-oauth")
assert r["accepted"] is True
assert r["recognized"] is False
def test_real_catalog_model_unaffected(self):
from hermes_cli.models import validate_requested_model
r = validate_requested_model("gpt-5.5", "openai-codex")
assert r["accepted"] is True
assert r["recognized"] is True