fix(model): preserve custom endpoint credentials during /model switch

Named custom providers (ollama-launch, lmstudio, etc.) resolve to provider="custom" via _resolve_named_custom_runtime(). When switch_model() re-resolves credentials for "custom", _get_named_custom_provider("custom") rejects the bare name and the fallback chain reaches OpenRouter — silently changing the endpoint. Detect custom/local endpoints and skip credential re-resolution when the provider hasn't changed. The existing api_key and base_url from the live agent are already correct. Reproduced: ollama launch hermes → /model gemma3:4b → probed OpenRouter instead of localhost:11434. After fix: model switch stays on local Ollama.
2026-04-25 00:51:20 +00:00 · 2026-04-24 16:39:12 +05:30 · 2026-04-24 16:39:12 +05:30 · ca8df75626
commit ca8df75626
parent 3a86f70969
2 changed files with 82 additions and 4 deletions
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -770,6 +770,7 @@ def switch_model(
    api_mode = ""

    if provider_changed or explicit_provider:
+        # Switching providers — resolve fresh credentials for the target.
        try:
            runtime = resolve_runtime_provider(requested=target_provider)
            api_key = runtime.get("api_key", "")
@ -787,13 +788,41 @@ def switch_model(
                ),
            )
    else:
+        # Same provider, different model.  Re-resolve to pick up credential
+        # rotation and provider-specific base_url adjustments (e.g. OpenCode
+        # strips /v1 for Anthropic models, restores it for chat_completions).
+        # However, re-resolution fails for custom/local endpoints: the
+        # runtime resolver maps named providers (e.g. "ollama-launch") to the
+        # generic "custom" label, which cannot round-trip and falls through to
+        # OpenRouter.  Detect this and keep the caller's existing credentials.
+        _keep_existing = False
        try:
            runtime = resolve_runtime_provider(requested=current_provider)
-            api_key = runtime.get("api_key", "")
-            base_url = runtime.get("base_url", "")
+            _resolved_base = (runtime.get("base_url") or "").rstrip("/")
+            _current_base = (current_base_url or "").rstrip("/")
+            # Guard: for custom/local providers, if re-resolution produced a
+            # different base_url it means the resolver couldn't find the
+            # original endpoint and fell through (e.g. "custom" → OpenRouter).
+            # Known providers (opencode, openrouter, etc.) may legitimately
+            # return a different base_url, so only guard "custom"/"local".
+            if (
+                current_provider in ("custom", "local")
+                and _current_base
+                and _resolved_base != _current_base
+            ):
+                _keep_existing = True
+            else:
+                api_key = runtime.get("api_key", "") or api_key
+                base_url = runtime.get("base_url", "") or base_url
                api_mode = runtime.get("api_mode", "")
        except Exception:
-            pass
+            _keep_existing = True
+
+        if _keep_existing:
+            # Resolution failed or fell through — keep the caller's
+            # known-good credentials and detect api_mode from the URL.
+            from hermes_cli.runtime_provider import _detect_api_mode_for_url
+            api_mode = _detect_api_mode_for_url(base_url) or "chat_completions"

    # --- Direct alias override: use exact base_url from the alias if set ---
    if resolved_alias:
@ -824,6 +853,39 @@ def switch_model(
            "message": f"Could not validate `{new_model}`: {e}",
        }

+    # Fallback: if the /v1/models probe rejected the model but the config's
+    # provider entry lists it, accept anyway.  This covers models that the
+    # endpoint serves but doesn't advertise (e.g. Ollama cloud models like
+    # "kimi-k2.5:cloud" work via cloud routing but don't appear in the local
+    # /v1/models response).
+    if not validation.get("accepted") and target_provider in ("custom", "local"):
+        _config_models: set = set()
+        if user_providers and isinstance(user_providers, dict):
+            for _ep_cfg in user_providers.values():
+                if isinstance(_ep_cfg, dict):
+                    _dm = _ep_cfg.get("default_model", "")
+                    if _dm:
+                        _config_models.add(_dm)
+                    _ml = _ep_cfg.get("models", [])
+                    if isinstance(_ml, (list, dict)):
+                        _config_models.update(m for m in _ml if m)
+        if custom_providers and isinstance(custom_providers, list):
+            for _cp in custom_providers:
+                if isinstance(_cp, dict):
+                    _dm = _cp.get("model", "")
+                    if _dm:
+                        _config_models.add(_dm)
+                    _ml = _cp.get("models", [])
+                    if isinstance(_ml, (list, dict)):
+                        _config_models.update(m for m in _ml if m)
+        if new_model in _config_models:
+            validation = {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "message": None,
+            }
+
    if not validation.get("accepted"):
        msg = validation.get("message", "Invalid model")
        return ModelSwitchResult(
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -667,6 +667,20 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        current_base_url = str(runtime.get("base_url", "") or "")
        current_api_key = str(runtime.get("api_key", "") or "")

+    # Load user-defined providers from config so switch_model can resolve
+    # named custom endpoints (e.g. "ollama-launch") and use the config's
+    # model list as a validation fallback for models the /v1/models probe
+    # doesn't advertise (e.g. Ollama cloud models).  Parity with cli.py's
+    # _handle_model_switch which passes the same two fields.
+    cfg = _load_cfg()
+    user_provs = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else None
+    custom_provs = None
+    try:
+        from hermes_cli.config import get_compatible_custom_providers
+        custom_provs = get_compatible_custom_providers(cfg)
+    except Exception:
+        pass
+
    result = switch_model(
        raw_input=model_input,
        current_provider=current_provider,
@ -675,6 +689,8 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        current_api_key=current_api_key,
        is_global=persist_global,
        explicit_provider=explicit_provider,
+        user_providers=user_provs,
+        custom_providers=custom_provs,
    )
    if not result.success:
        raise ValueError(result.error_message or "model switch failed")