fix(model_switch): group custom_providers by endpoint in /model picker (#9210)

Multiple custom_providers entries sharing the same base_url + api_key are now grouped into a single picker row. A local Ollama host with per-model display names ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", "Ollama — Kimi K2", "Ollama — MiniMax M2.7") previously produced four near-duplicate picker rows that differed only by suffix; now it appears as one "Ollama" row with four models. Key changes: - Grouping key changed from slug-by-name to (base_url, api_key). Names frequently differ per model while the endpoint stays the same. - When the grouped endpoint matches current_base_url, the row's slug is set to current_provider so picker-driven switches route through the live credential pipeline (no re-resolution needed). - Per-model suffix is stripped from the display name ("Ollama — X" → "Ollama") via em-dash / " - " separators. - Two groups with different api_keys at the same base_url (or otherwise colliding on cleaned name) are disambiguated with a numeric suffix (custom:openai, custom:openai-2) so both stay visible. - current_base_url parameter plumbed through both gateway call sites. Existing #8216, #11499, #13509 regressions covered (dict/list shapes of models:, section-3/section-4 dedup, normalized list-format entries). Salvaged from @davidvv's PR #9210 — the underlying code had diverged ~1400 commits since that PR was opened, so this is a reconstruction of the same approach on current main rather than a clean cherry-pick. Authorship preserved via --author on this commit. Closes #9210
2026-04-25 00:51:20 +00:00 · 2026-04-23 03:05:12 -07:00 · 2026-04-23 03:05:12 -07:00 · 39fcf1d127
commit 39fcf1d127
parent 6172f95944
3 changed files with 223 additions and 27 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -5484,6 +5484,7 @@ class GatewayRunner:
                try:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
+                        current_base_url=current_base_url,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
@ -5595,6 +5596,7 @@ class GatewayRunner:
            try:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
+                    current_base_url=current_base_url,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -782,6 +782,7 @@ def switch_model(

 def list_authenticated_providers(
    current_provider: str = "",
+    current_base_url: str = "",
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
@ -1121,66 +1122,113 @@ def list_authenticated_providers(

    # --- 4. Saved custom providers from config ---
    # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same provider name are grouped into a
-    # single picker row so that e.g. four Ollama Cloud entries
-    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
-    # "Ollama Cloud" row with four models inside instead of four
-    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
-    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
+    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
+    # are grouped into a single picker row, so e.g. four Ollama entries
+    # pointing at ``http://localhost:11434/v1`` with per-model display names
+    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
+    # "Ollama" row with four models inside instead of four near-duplicates
+    # that differ only by suffix. Entries with distinct endpoints still
+    # produce separate rows.
+    #
+    # When the grouped endpoint matches ``current_base_url`` the group's
+    # slug becomes ``current_provider`` so that selecting a model from the
+    # picker flows back through the runtime provider that already holds
+    # valid credentials — no re-resolution needed.
    if custom_providers and isinstance(custom_providers, list):
        from collections import OrderedDict

-        groups: "OrderedDict[str, dict]" = OrderedDict()
+        # Key by (base_url, api_key) instead of slug: names frequently
+        # differ per model ("Ollama — X") while the endpoint stays the
+        # same. Slug-based grouping left them as separate rows.
+        groups: "OrderedDict[tuple, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue

-            display_name = (entry.get("name") or "").strip()
+            raw_name = (entry.get("name") or "").strip()
            api_url = (
                entry.get("base_url", "")
                or entry.get("url", "")
                or entry.get("api", "")
                or ""
-            ).strip()
-            if not display_name or not api_url:
+            ).strip().rstrip("/")
+            if not raw_name or not api_url:
                continue
+            api_key = (entry.get("api_key") or "").strip()

-            slug = custom_provider_slug(display_name)
-            if slug not in groups:
-                groups[slug] = {
+            group_key = (api_url, api_key)
+            if group_key not in groups:
+                # Strip per-model suffix so "Ollama — GLM 5.1" becomes
+                # "Ollama" for the grouped row. Em dash is the convention
+                # Hermes's own writer uses; a hyphen variant is accepted
+                # for hand-edited configs.
+                display_name = raw_name
+                for sep in ("—", " - "):
+                    if sep in display_name:
+                        display_name = display_name.split(sep)[0].strip()
+                        break
+                if not display_name:
+                    display_name = raw_name
+                # If this endpoint matches the currently active one, use
+                # ``current_provider`` as the slug so picker-driven switches
+                # route through the live credential pipeline.
+                if (
+                    current_base_url
+                    and api_url == current_base_url.strip().rstrip("/")
+                ):
+                    slug = current_provider or custom_provider_slug(display_name)
+                else:
+                    slug = custom_provider_slug(display_name)
+                groups[group_key] = {
+                    "slug": slug,
                    "name": display_name,
                    "api_url": api_url,
                    "models": [],
                }
+
            # The singular ``model:`` field only holds the currently
            # active model. Hermes's own writer (main.py::_save_custom_provider)
            # stores every configured model as a dict under ``models:``;
            # downstream readers (agent/models_dev.py, gateway/run.py,
            # run_agent.py, hermes_cli/config.py) already consume that dict.
-            # The /model picker previously ignored it, so multi-model
-            # custom providers appeared to have only the active model.
            default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[slug]["models"]:
-                groups[slug]["models"].append(default_model)
+            if default_model and default_model not in groups[group_key]["models"]:
+                groups[group_key]["models"].append(default_model)

            cfg_models = entry.get("models", {})
            if isinstance(cfg_models, dict):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)
            elif isinstance(cfg_models, list):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)

-        for slug, grp in groups.items():
-            if slug.lower() in seen_slugs:
+        _section4_emitted_slugs: set = set()
+        for grp in groups.values():
+            slug = grp["slug"]
+            # If the slug is already claimed by a built-in / overlay /
+            # user-provider row (sections 1-3), skip this custom group
+            # to avoid shadowing a real provider.
+            if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
                continue
+            # If a prior section-4 group already used this slug (two custom
+            # endpoints with the same cleaned name — e.g. two OpenAI-
+            # compatible gateways named identically with different keys),
+            # append a counter so both rows stay visible in the picker.
+            if slug.lower() in _section4_emitted_slugs:
+                base_slug = slug
+                n = 2
+                while f"{base_slug}-{n}".lower() in seen_slugs:
+                    n += 1
+                slug = f"{base_slug}-{n}"
+                grp["slug"] = slug
            # Skip if section 3 already emitted this endpoint under its
-            # ``providers:`` dict key — matches on (display_name, base_url),
-            # the tuple section 4 groups by.  Prevents two picker rows
-            # labelled identically when callers pass both ``user_providers``
-            # and a compatibility-merged ``custom_providers`` list.
+            # ``providers:`` dict key — matches on (display_name, base_url).
+            # Prevents two picker rows labelled identically when callers
+            # pass both ``user_providers`` and a compatibility-merged
+            # ``custom_providers`` list.
            _pair_key = (
                str(grp["name"]).strip().lower(),
                str(grp["api_url"]).strip().rstrip("/").lower(),
@ -1198,6 +1246,7 @@ def list_authenticated_providers(
                "api_url": grp["api_url"],
            })
            seen_slugs.add(slug.lower())
+            _section4_emitted_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
--- a/tests/hermes_cli/test_model_switch_custom_providers.py
+++ b/tests/hermes_cli/test_model_switch_custom_providers.py
@ -253,3 +253,148 @@ def test_list_dedupes_dict_model_matching_singular_default(monkeypatch):
    ds_rows = [p for p in providers if p["name"] == "DeepSeek"]
    assert ds_rows[0]["models"].count("deepseek-chat") == 1
    assert ds_rows[0]["models"] == ["deepseek-chat", "deepseek-reasoner"]
+
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# #9210: group custom_providers by (base_url, api_key) in /model picker
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_list_authenticated_providers_groups_same_endpoint(monkeypatch):
+    """Multiple custom_providers entries sharing a base_url+api_key must be
+    returned as a single picker row with all their models merged."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        current_base_url="http://localhost:11434/v1",
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — MiniMax M2.7", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "minimax-m2.7"},
+            {"name": "Ollama — GLM 5.1",      "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+            {"name": "Ollama — Qwen3-coder", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "qwen3-coder"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 1, (
+        "Expected 1 group for shared endpoint, got "
+        f"{[p['slug'] for p in custom_groups]}"
+    )
+    group = custom_groups[0]
+    assert set(group["models"]) == {"minimax-m2.7", "glm-5.1", "qwen3-coder"}
+    assert group["total_models"] == 3
+    # Per-model suffix stripped from display name
+    assert group["name"] == "Ollama"
+
+
+def test_list_authenticated_providers_current_endpoint_uses_current_slug(monkeypatch):
+    """When current_base_url matches the grouped endpoint, the slug must
+    equal current_provider so picker selection routes through the live
+    credential pipeline."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        current_base_url="http://localhost:11434/v1",
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — GLM 5.1", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+        ],
+        max_models=50,
+    )
+
+    matches = [p for p in providers if p.get("is_user_defined")]
+    assert len(matches) == 1
+    group = matches[0]
+    assert group["slug"] == "custom"
+    assert group["is_current"] is True
+
+
+def test_list_authenticated_providers_distinct_endpoints_stay_separate(monkeypatch):
+    """Entries with different base_urls must produce separate picker rows
+    even if some display names happen to be similar."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — GLM 5.1", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+            {"name": "Moonshot", "base_url": "https://api.moonshot.cn/v1",
+             "api_key": "sk-m", "model": "moonshot-v1"},
+            {"name": "Ollama — Qwen3-coder", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "qwen3-coder"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 2
+    # Ollama endpoint collapses to one row with both models
+    ollama = next(p for p in custom_groups if p["name"] == "Ollama")
+    assert set(ollama["models"]) == {"glm-5.1", "qwen3-coder"}
+    moonshot = next(p for p in custom_groups if p["name"] == "Moonshot")
+    assert moonshot["models"] == ["moonshot-v1"]
+
+
+def test_list_authenticated_providers_same_url_different_keys_disambiguated(monkeypatch):
+    """Two custom_providers entries with the same base_url but different
+    api_keys (and identical cleaned names) must both stay visible in the
+    picker — slug is suffixed to disambiguate."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=[
+            {"name": "OpenAI — key A", "base_url": "https://api.openai.com/v1",
+             "api_key": "sk-AAA", "model": "gpt-5.4"},
+            {"name": "OpenAI — key B", "base_url": "https://api.openai.com/v1",
+             "api_key": "sk-BBB", "model": "gpt-4.6"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 2
+    slugs = sorted(p["slug"] for p in custom_groups)
+    # First group keeps the base slug, second gets a numeric suffix
+    assert slugs == ["custom:openai", "custom:openai-2"]
+    # Each row has a distinct model
+    models = {p["slug"]: p["models"] for p in custom_groups}
+    assert models["custom:openai"] == ["gpt-5.4"]
+    assert models["custom:openai-2"] == ["gpt-4.6"]
+
+
+def test_list_authenticated_providers_total_models_reflects_grouped_count(monkeypatch):
+    """After grouping six entries into one row, total_models must reflect
+    the full count, and every grouped model appears in the list."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    entries = [
+        {"name": f"Ollama \u2014 Model {i}", "base_url": "http://localhost:11434/v1",
+         "api_key": "ollama", "model": f"model-{i}"}
+        for i in range(6)
+    ]
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=entries,
+        max_models=4,
+    )
+
+    groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(groups) == 1
+    group = groups[0]
+    assert group["total_models"] == 6
+    # All six models are preserved in the grouped row.
+    assert sorted(group["models"]) == sorted(f"model-{i}" for i in range(6))