fix(model): preserve custom endpoint credentials during /model switch

Named custom providers (ollama-launch, lmstudio, etc.) resolve to provider="custom" via _resolve_named_custom_runtime(). When switch_model() re-resolves credentials for "custom", _get_named_custom_provider("custom") rejects the bare name and the fallback chain reaches OpenRouter — silently changing the endpoint. Detect custom/local endpoints and skip credential re-resolution when the provider hasn't changed. The existing api_key and base_url from the live agent are already correct. Reproduced: ollama launch hermes → /model gemma3:4b → probed OpenRouter instead of localhost:11434. After fix: model switch stays on local Ollama.
2026-04-25 00:51:20 +00:00 · 2026-04-24 16:39:12 +05:30 · 2026-04-24 16:39:12 +05:30 · ca8df75626
commit ca8df75626
parent 3a86f70969
2 changed files with 82 additions and 4 deletions
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -770,6 +770,7 @@ def switch_model(
    api_mode = ""
    if provider_changed or explicit_provider:
        # Switching providers — resolve fresh credentials for the target.
        try:
            runtime = resolve_runtime_provider(requested=target_provider)
            api_key = runtime.get("api_key", "")
@ -787,13 +788,41 @@ def switch_model(
                ),
            )
    else:
        # Same provider, different model.  Re-resolve to pick up credential
        # rotation and provider-specific base_url adjustments (e.g. OpenCode
        # strips /v1 for Anthropic models, restores it for chat_completions).
        # However, re-resolution fails for custom/local endpoints: the
        # runtime resolver maps named providers (e.g. "ollama-launch") to the
        # generic "custom" label, which cannot round-trip and falls through to
        # OpenRouter.  Detect this and keep the caller's existing credentials.
        _keep_existing = False
        try:
            runtime = resolve_runtime_provider(requested=current_provider)
-            api_key = runtime.get("api_key", "")
+            _resolved_base = (runtime.get("base_url") or "").rstrip("/")
-            base_url = runtime.get("base_url", "")
+            _current_base = (current_base_url or "").rstrip("/")
            # Guard: for custom/local providers, if re-resolution produced a
            # different base_url it means the resolver couldn't find the
            # original endpoint and fell through (e.g. "custom" → OpenRouter).
            # Known providers (opencode, openrouter, etc.) may legitimately
            # return a different base_url, so only guard "custom"/"local".
            if (
                current_provider in ("custom", "local")
                and _current_base
                and _resolved_base != _current_base
            ):
                _keep_existing = True
            else:
                api_key = runtime.get("api_key", "") or api_key
                base_url = runtime.get("base_url", "") or base_url
                api_mode = runtime.get("api_mode", "")
        except Exception:
-            pass
+            _keep_existing = True
        if _keep_existing:
            # Resolution failed or fell through — keep the caller's
            # known-good credentials and detect api_mode from the URL.
            from hermes_cli.runtime_provider import _detect_api_mode_for_url
            api_mode = _detect_api_mode_for_url(base_url) or "chat_completions"
    # --- Direct alias override: use exact base_url from the alias if set ---
    if resolved_alias:
@ -824,6 +853,39 @@ def switch_model(
            "message": f"Could not validate `{new_model}`: {e}",
        }
    # Fallback: if the /v1/models probe rejected the model but the config's
    # provider entry lists it, accept anyway.  This covers models that the
    # endpoint serves but doesn't advertise (e.g. Ollama cloud models like
    # "kimi-k2.5:cloud" work via cloud routing but don't appear in the local
    # /v1/models response).
    if not validation.get("accepted") and target_provider in ("custom", "local"):
        _config_models: set = set()
        if user_providers and isinstance(user_providers, dict):
            for _ep_cfg in user_providers.values():
                if isinstance(_ep_cfg, dict):
                    _dm = _ep_cfg.get("default_model", "")
                    if _dm:
                        _config_models.add(_dm)
                    _ml = _ep_cfg.get("models", [])
                    if isinstance(_ml, (list, dict)):
                        _config_models.update(m for m in _ml if m)
        if custom_providers and isinstance(custom_providers, list):
            for _cp in custom_providers:
                if isinstance(_cp, dict):
                    _dm = _cp.get("model", "")
                    if _dm:
                        _config_models.add(_dm)
                    _ml = _cp.get("models", [])
                    if isinstance(_ml, (list, dict)):
                        _config_models.update(m for m in _ml if m)
        if new_model in _config_models:
            validation = {
                "accepted": True,
                "persist": True,
                "recognized": True,
                "message": None,
            }
    if not validation.get("accepted"):
        msg = validation.get("message", "Invalid model")
        return ModelSwitchResult(
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -667,6 +667,20 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        current_base_url = str(runtime.get("base_url", "") or "")
        current_api_key = str(runtime.get("api_key", "") or "")
    # Load user-defined providers from config so switch_model can resolve
    # named custom endpoints (e.g. "ollama-launch") and use the config's
    # model list as a validation fallback for models the /v1/models probe
    # doesn't advertise (e.g. Ollama cloud models).  Parity with cli.py's
    # _handle_model_switch which passes the same two fields.
    cfg = _load_cfg()
    user_provs = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else None
    custom_provs = None
    try:
        from hermes_cli.config import get_compatible_custom_providers
        custom_provs = get_compatible_custom_providers(cfg)
    except Exception:
        pass
    result = switch_model(
        raw_input=model_input,
        current_provider=current_provider,
@ -675,6 +689,8 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        current_api_key=current_api_key,
        is_global=persist_global,
        explicit_provider=explicit_provider,
        user_providers=user_provs,
        custom_providers=custom_provs,
    )
    if not result.success:
        raise ValueError(result.error_message or "model switch failed")