From 0507e4630dd7eb66465008eeb6045ec913f9c3ad Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 02:48:21 -0700 Subject: [PATCH] fix(desktop): preserve configured base_url on same-provider model switch (#41121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The desktop model picker calls POST /api/model/set with provider+model only (no base_url). _apply_main_model_assignment cleared model.base_url for every non-custom provider, so re-picking a Xiaomi MiMo model wiped a Token Plan endpoint (https://token-plan-*.xiaomimimo.com/v1) back to the registry default api.xiaomimimo.com — breaking valid tp- keys with 401s. Now base_url is cleared only when switching to a different provider (the stale URL belonged to the old one); same-provider re-assignment preserves it, and an explicitly supplied base_url is honored for any provider. --- hermes_cli/web_server.py | 36 +++++++++++---- tests/hermes_cli/test_web_server.py | 68 +++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 9dc3262e16f..dca6984716e 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -692,23 +692,41 @@ def _apply_main_model_assignment( ) -> dict: """Apply a main-slot model assignment to a ``model`` config dict in place. - Sets ``provider``/``default``, then reconciles ``base_url``: custom/local - providers persist the supplied endpoint URL (the runtime resolver reads - ``model.base_url`` from config and ignores ``OPENAI_BASE_URL``), while every - other provider clears any stale URL so the resolver picks that provider's - own default endpoint. The hardcoded ``context_length`` override is always - dropped since the new model may have a different context window. + Sets ``provider``/``default``, then reconciles ``base_url``: + + - An explicitly supplied ``base_url`` is always persisted (covers + ``custom``/local endpoints and any provider whose key is bound to a + non-default host). + - Otherwise, a stale ``base_url`` is cleared ONLY when switching to a + *different* provider — that URL belonged to the old provider. When the + provider is unchanged and no new URL is supplied, the existing + ``base_url`` is preserved. This keeps a user's custom endpoint (e.g. a + Xiaomi MiMo Token Plan host, ``https://token-plan-*.xiaomimimo.com/v1``) + alive when they merely re-pick a model under the same provider — picking + a model previously wiped it, forcing the registry default and breaking + Token Plan keys. + + The runtime resolver reads ``model.base_url`` from config (it ignores + ``OPENAI_BASE_URL``) and only honors it when the configured provider matches + and the pool entry is on the registry default, so preserving it here is what + lets the override actually route. The hardcoded ``context_length`` override + is always dropped since the new model may have a different context window. Returns the same dict (coerced to a fresh dict if the input wasn't one) so - callers can assign it straight back onto ``cfg["model"]``. + callers can assign it straight back onto the model config. """ if not isinstance(model_cfg, dict): model_cfg = {} + prev_provider = str(model_cfg.get("provider") or "").strip().lower() + new_provider = provider.strip().lower() model_cfg["provider"] = provider model_cfg["default"] = model - if provider.strip().lower() == "custom" and base_url.strip(): + if base_url.strip(): model_cfg["base_url"] = base_url.strip() - elif model_cfg.get("base_url"): + elif model_cfg.get("base_url") and new_provider != prev_provider: + # Switching providers: the old URL belonged to the old provider, drop + # it so the new provider's default endpoint is used. Same-provider + # re-assignment keeps the user's configured base_url intact. model_cfg["base_url"] = "" model_cfg.pop("context_length", None) return model_cfg diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index bb3085eff22..527d0939a38 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -1269,10 +1269,11 @@ class TestWebServerEndpoints: assert data.get("gateway_tools", []) == [] def test_apply_main_model_assignment_base_url_and_context_reconcile(self): - """The shared main-slot assignment helper must persist base_url only for - custom providers, clear stale base_url for hosted ones, and always drop - a hardcoded context_length override. Both POST /api/model/set and - profile-model writes route through this, so the contract is pinned here.""" + """The shared main-slot assignment helper must persist a supplied + base_url, clear a stale base_url only when switching providers, preserve + it on same-provider re-assignment, and always drop a hardcoded + context_length override. Both POST /api/model/set and profile-model + writes route through this, so the contract is pinned here.""" from hermes_cli.web_server import _apply_main_model_assignment # Custom + base_url → persisted; stale context_length dropped. @@ -1284,16 +1285,39 @@ class TestWebServerEndpoints: assert out["base_url"] == "http://127.0.0.1:8000/v1" assert "context_length" not in out - # Hosted provider → stale base_url cleared (no base_url supplied). + # Switching providers (custom → openrouter) → stale base_url cleared. out = _apply_main_model_assignment( - {"base_url": "http://127.0.0.1:8000/v1"}, "openrouter", "anthropic/claude-opus-4.8" + {"provider": "custom", "base_url": "http://127.0.0.1:8000/v1"}, + "openrouter", + "anthropic/claude-opus-4.8", ) assert out["provider"] == "openrouter" assert out["base_url"] == "" - # Custom WITHOUT a base_url → don't invent one, clear any stale value. + # Same provider, no new base_url → existing custom endpoint preserved. + # Regression: picking a different MiMo model under xiaomi must NOT wipe a + # Token Plan base_url (https://token-plan-*.xiaomimimo.com/v1). out = _apply_main_model_assignment( - {"base_url": "http://stale:1/v1"}, "custom", "m" + {"provider": "xiaomi", "base_url": "https://token-plan-ams.xiaomimimo.com/v1"}, + "xiaomi", + "mimo-v2.5-pro", + ) + assert out["provider"] == "xiaomi" + assert out["default"] == "mimo-v2.5-pro" + assert out["base_url"] == "https://token-plan-ams.xiaomimimo.com/v1" + + # A supplied base_url is honored for any provider, not just custom. + out = _apply_main_model_assignment( + {"provider": "xiaomi"}, + "xiaomi", + "mimo-v2.5", + "https://token-plan-cn.xiaomimimo.com/v1", + ) + assert out["base_url"] == "https://token-plan-cn.xiaomimimo.com/v1" + + # Switching providers without a base_url → don't invent one, clear stale. + out = _apply_main_model_assignment( + {"provider": "openrouter", "base_url": "http://stale:1/v1"}, "custom", "m" ) assert out["base_url"] == "" @@ -1377,6 +1401,34 @@ class TestWebServerEndpoints: assert resp.status_code == 200 assert resp.json()["base_url"] == "" + def test_set_model_main_same_provider_preserves_base_url(self): + """Re-picking a model under the SAME provider must NOT wipe a configured + base_url. Regression for the desktop bug where selecting a Xiaomi MiMo + model reset a Token Plan endpoint back to the registry default, breaking + Token Plan keys (https://token-plan-*.xiaomimimo.com/v1).""" + from hermes_cli.config import load_config, save_config + + cfg = load_config() + cfg["model"] = { + "provider": "xiaomi", + "default": "mimo-v2.5-pro", + "base_url": "https://token-plan-ams.xiaomimimo.com/v1", + } + save_config(cfg) + + # Desktop model picker sends provider+model only (no base_url). + resp = self.client.post( + "/api/model/set", + json={"scope": "main", "provider": "xiaomi", "model": "mimo-v2.5"}, + ) + assert resp.status_code == 200 + assert resp.json()["base_url"] == "https://token-plan-ams.xiaomimimo.com/v1" + + model_cfg = load_config().get("model") + assert isinstance(model_cfg, dict) + assert model_cfg["default"] == "mimo-v2.5" + assert model_cfg["base_url"] == "https://token-plan-ams.xiaomimimo.com/v1" + def test_set_model_main_reports_stale_auxiliary_pins(self): """Switching the main provider must report auxiliary slots still pinned to a *different* provider so the UI can warn the user their helper tasks