From 39fcf1d12712f5526ebddc9f9ebb075795af73ba Mon Sep 17 00:00:00 2001
From: David VV <david.vv@icloud.com>
Date: Thu, 23 Apr 2026 03:05:12 -0700
Subject: [PATCH] fix(model_switch): group custom_providers by endpoint in
 /model picker (#9210)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Multiple custom_providers entries sharing the same base_url + api_key
are now grouped into a single picker row. A local Ollama host with
per-model display names ("Ollama — GLM 5.1", "Ollama — Qwen3-coder",
"Ollama — Kimi K2", "Ollama — MiniMax M2.7") previously produced four
near-duplicate picker rows that differed only by suffix; now it appears
as one "Ollama" row with four models.

Key changes:
- Grouping key changed from slug-by-name to (base_url, api_key). Names
  frequently differ per model while the endpoint stays the same.
- When the grouped endpoint matches current_base_url, the row's slug is
  set to current_provider so picker-driven switches route through the
  live credential pipeline (no re-resolution needed).
- Per-model suffix is stripped from the display name ("Ollama — X" →
  "Ollama") via em-dash / " - " separators.
- Two groups with different api_keys at the same base_url (or otherwise
  colliding on cleaned name) are disambiguated with a numeric suffix
  (custom:openai, custom:openai-2) so both stay visible.
- current_base_url parameter plumbed through both gateway call sites.

Existing #8216, #11499, #13509 regressions covered (dict/list shapes
of models:, section-3/section-4 dedup, normalized list-format entries).

Salvaged from @davidvv's PR #9210 — the underlying code had diverged
~1400 commits since that PR was opened, so this is a reconstruction of
the same approach on current main rather than a clean cherry-pick.
Authorship preserved via --author on this commit.

Closes #9210
---
 gateway/run.py                                |   2 +
 hermes_cli/model_switch.py                    | 103 +++++++++----
 .../test_model_switch_custom_providers.py     | 145 ++++++++++++++++++
 3 files changed, 223 insertions(+), 27 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index a024649cb..0110e8cad 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -5484,6 +5484,7 @@ class GatewayRunner:
                 try:
                     providers = list_authenticated_providers(
                         current_provider=current_provider,
+                        current_base_url=current_base_url,
                         user_providers=user_provs,
                         custom_providers=custom_provs,
                         max_models=50,
@@ -5595,6 +5596,7 @@ class GatewayRunner:
             try:
                 providers = list_authenticated_providers(
                     current_provider=current_provider,
+                    current_base_url=current_base_url,
                     user_providers=user_provs,
                     custom_providers=custom_provs,
                     max_models=5,
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index c3af1eacf..a7c98d9c8 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -782,6 +782,7 @@ def switch_model(
 
 def list_authenticated_providers(
     current_provider: str = "",
+    current_base_url: str = "",
     user_providers: dict = None,
     custom_providers: list | None = None,
     max_models: int = 8,
@@ -1121,66 +1122,113 @@ def list_authenticated_providers(
 
     # --- 4. Saved custom providers from config ---
     # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same provider name are grouped into a
-    # single picker row so that e.g. four Ollama Cloud entries
-    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
-    # "Ollama Cloud" row with four models inside instead of four
-    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
-    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
+    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
+    # are grouped into a single picker row, so e.g. four Ollama entries
+    # pointing at ``http://localhost:11434/v1`` with per-model display names
+    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
+    # "Ollama" row with four models inside instead of four near-duplicates
+    # that differ only by suffix. Entries with distinct endpoints still
+    # produce separate rows.
+    #
+    # When the grouped endpoint matches ``current_base_url`` the group's
+    # slug becomes ``current_provider`` so that selecting a model from the
+    # picker flows back through the runtime provider that already holds
+    # valid credentials — no re-resolution needed.
     if custom_providers and isinstance(custom_providers, list):
         from collections import OrderedDict
 
-        groups: "OrderedDict[str, dict]" = OrderedDict()
+        # Key by (base_url, api_key) instead of slug: names frequently
+        # differ per model ("Ollama — X") while the endpoint stays the
+        # same. Slug-based grouping left them as separate rows.
+        groups: "OrderedDict[tuple, dict]" = OrderedDict()
         for entry in custom_providers:
             if not isinstance(entry, dict):
                 continue
 
-            display_name = (entry.get("name") or "").strip()
+            raw_name = (entry.get("name") or "").strip()
             api_url = (
                 entry.get("base_url", "")
                 or entry.get("url", "")
                 or entry.get("api", "")
                 or ""
-            ).strip()
-            if not display_name or not api_url:
+            ).strip().rstrip("/")
+            if not raw_name or not api_url:
                 continue
+            api_key = (entry.get("api_key") or "").strip()
 
-            slug = custom_provider_slug(display_name)
-            if slug not in groups:
-                groups[slug] = {
+            group_key = (api_url, api_key)
+            if group_key not in groups:
+                # Strip per-model suffix so "Ollama — GLM 5.1" becomes
+                # "Ollama" for the grouped row. Em dash is the convention
+                # Hermes's own writer uses; a hyphen variant is accepted
+                # for hand-edited configs.
+                display_name = raw_name
+                for sep in ("—", " - "):
+                    if sep in display_name:
+                        display_name = display_name.split(sep)[0].strip()
+                        break
+                if not display_name:
+                    display_name = raw_name
+                # If this endpoint matches the currently active one, use
+                # ``current_provider`` as the slug so picker-driven switches
+                # route through the live credential pipeline.
+                if (
+                    current_base_url
+                    and api_url == current_base_url.strip().rstrip("/")
+                ):
+                    slug = current_provider or custom_provider_slug(display_name)
+                else:
+                    slug = custom_provider_slug(display_name)
+                groups[group_key] = {
+                    "slug": slug,
                     "name": display_name,
                     "api_url": api_url,
                     "models": [],
                 }
+
             # The singular ``model:`` field only holds the currently
             # active model. Hermes's own writer (main.py::_save_custom_provider)
             # stores every configured model as a dict under ``models:``;
             # downstream readers (agent/models_dev.py, gateway/run.py,
             # run_agent.py, hermes_cli/config.py) already consume that dict.
-            # The /model picker previously ignored it, so multi-model
-            # custom providers appeared to have only the active model.
             default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[slug]["models"]:
-                groups[slug]["models"].append(default_model)
+            if default_model and default_model not in groups[group_key]["models"]:
+                groups[group_key]["models"].append(default_model)
 
             cfg_models = entry.get("models", {})
             if isinstance(cfg_models, dict):
                 for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)
             elif isinstance(cfg_models, list):
                 for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)
 
-        for slug, grp in groups.items():
-            if slug.lower() in seen_slugs:
+        _section4_emitted_slugs: set = set()
+        for grp in groups.values():
+            slug = grp["slug"]
+            # If the slug is already claimed by a built-in / overlay /
+            # user-provider row (sections 1-3), skip this custom group
+            # to avoid shadowing a real provider.
+            if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
                 continue
+            # If a prior section-4 group already used this slug (two custom
+            # endpoints with the same cleaned name — e.g. two OpenAI-
+            # compatible gateways named identically with different keys),
+            # append a counter so both rows stay visible in the picker.
+            if slug.lower() in _section4_emitted_slugs:
+                base_slug = slug
+                n = 2
+                while f"{base_slug}-{n}".lower() in seen_slugs:
+                    n += 1
+                slug = f"{base_slug}-{n}"
+                grp["slug"] = slug
             # Skip if section 3 already emitted this endpoint under its
-            # ``providers:`` dict key — matches on (display_name, base_url),
-            # the tuple section 4 groups by.  Prevents two picker rows
-            # labelled identically when callers pass both ``user_providers``
-            # and a compatibility-merged ``custom_providers`` list.
+            # ``providers:`` dict key — matches on (display_name, base_url).
+            # Prevents two picker rows labelled identically when callers
+            # pass both ``user_providers`` and a compatibility-merged
+            # ``custom_providers`` list.
             _pair_key = (
                 str(grp["name"]).strip().lower(),
                 str(grp["api_url"]).strip().rstrip("/").lower(),
@@ -1198,6 +1246,7 @@ def list_authenticated_providers(
                 "api_url": grp["api_url"],
             })
             seen_slugs.add(slug.lower())
+            _section4_emitted_slugs.add(slug.lower())
 
     # Sort: current provider first, then by model count descending
     results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
diff --git a/tests/hermes_cli/test_model_switch_custom_providers.py b/tests/hermes_cli/test_model_switch_custom_providers.py
index 2bd7edbf1..7fc92136a 100644
--- a/tests/hermes_cli/test_model_switch_custom_providers.py
+++ b/tests/hermes_cli/test_model_switch_custom_providers.py
@@ -253,3 +253,148 @@ def test_list_dedupes_dict_model_matching_singular_default(monkeypatch):
     ds_rows = [p for p in providers if p["name"] == "DeepSeek"]
     assert ds_rows[0]["models"].count("deepseek-chat") == 1
     assert ds_rows[0]["models"] == ["deepseek-chat", "deepseek-reasoner"]
+
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# #9210: group custom_providers by (base_url, api_key) in /model picker
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_list_authenticated_providers_groups_same_endpoint(monkeypatch):
+    """Multiple custom_providers entries sharing a base_url+api_key must be
+    returned as a single picker row with all their models merged."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        current_base_url="http://localhost:11434/v1",
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — MiniMax M2.7", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "minimax-m2.7"},
+            {"name": "Ollama — GLM 5.1",      "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+            {"name": "Ollama — Qwen3-coder", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "qwen3-coder"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 1, (
+        "Expected 1 group for shared endpoint, got "
+        f"{[p['slug'] for p in custom_groups]}"
+    )
+    group = custom_groups[0]
+    assert set(group["models"]) == {"minimax-m2.7", "glm-5.1", "qwen3-coder"}
+    assert group["total_models"] == 3
+    # Per-model suffix stripped from display name
+    assert group["name"] == "Ollama"
+
+
+def test_list_authenticated_providers_current_endpoint_uses_current_slug(monkeypatch):
+    """When current_base_url matches the grouped endpoint, the slug must
+    equal current_provider so picker selection routes through the live
+    credential pipeline."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        current_base_url="http://localhost:11434/v1",
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — GLM 5.1", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+        ],
+        max_models=50,
+    )
+
+    matches = [p for p in providers if p.get("is_user_defined")]
+    assert len(matches) == 1
+    group = matches[0]
+    assert group["slug"] == "custom"
+    assert group["is_current"] is True
+
+
+def test_list_authenticated_providers_distinct_endpoints_stay_separate(monkeypatch):
+    """Entries with different base_urls must produce separate picker rows
+    even if some display names happen to be similar."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — GLM 5.1", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+            {"name": "Moonshot", "base_url": "https://api.moonshot.cn/v1",
+             "api_key": "sk-m", "model": "moonshot-v1"},
+            {"name": "Ollama — Qwen3-coder", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "qwen3-coder"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 2
+    # Ollama endpoint collapses to one row with both models
+    ollama = next(p for p in custom_groups if p["name"] == "Ollama")
+    assert set(ollama["models"]) == {"glm-5.1", "qwen3-coder"}
+    moonshot = next(p for p in custom_groups if p["name"] == "Moonshot")
+    assert moonshot["models"] == ["moonshot-v1"]
+
+
+def test_list_authenticated_providers_same_url_different_keys_disambiguated(monkeypatch):
+    """Two custom_providers entries with the same base_url but different
+    api_keys (and identical cleaned names) must both stay visible in the
+    picker — slug is suffixed to disambiguate."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=[
+            {"name": "OpenAI — key A", "base_url": "https://api.openai.com/v1",
+             "api_key": "sk-AAA", "model": "gpt-5.4"},
+            {"name": "OpenAI — key B", "base_url": "https://api.openai.com/v1",
+             "api_key": "sk-BBB", "model": "gpt-4.6"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 2
+    slugs = sorted(p["slug"] for p in custom_groups)
+    # First group keeps the base slug, second gets a numeric suffix
+    assert slugs == ["custom:openai", "custom:openai-2"]
+    # Each row has a distinct model
+    models = {p["slug"]: p["models"] for p in custom_groups}
+    assert models["custom:openai"] == ["gpt-5.4"]
+    assert models["custom:openai-2"] == ["gpt-4.6"]
+
+
+def test_list_authenticated_providers_total_models_reflects_grouped_count(monkeypatch):
+    """After grouping six entries into one row, total_models must reflect
+    the full count, and every grouped model appears in the list."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    entries = [
+        {"name": f"Ollama \u2014 Model {i}", "base_url": "http://localhost:11434/v1",
+         "api_key": "ollama", "model": f"model-{i}"}
+        for i in range(6)
+    ]
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=entries,
+        max_models=4,
+    )
+
+    groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(groups) == 1
+    group = groups[0]
+    assert group["total_models"] == 6
+    # All six models are preserved in the grouped row.
+    assert sorted(group["models"]) == sorted(f"model-{i}" for i in range(6))