From 70f798043b65b003345803f95ffaa958d43465ac Mon Sep 17 00:00:00 2001
From: LucidPaths <LucidPaths@users.noreply.github.com>
Date: Sun, 5 Apr 2026 10:58:44 -0700
Subject: [PATCH] fix: Ollama Cloud auth, /model switch persistence, and alias
 tab completion

- Add OLLAMA_API_KEY to credential resolution chain for ollama.com endpoints
- Update requested_provider/_explicit_api_key/_explicit_base_url after /model
  switch so _ensure_runtime_credentials() doesn't revert the switch
- Pass base_url/api_key from fallback config to resolve_provider_client()
- Add DirectAlias system: user-configurable model_aliases in config.yaml
  checked before catalog resolution, with reverse lookup by model ID
- Add /model tab completion showing aliases with provider metadata

Co-authored-by: LucidPaths <LucidPaths@users.noreply.github.com>
---
 cli-config.yaml.example         |  27 ++
 cli.py                          |  21 +-
 gateway/run.py                  |  16 +
 hermes_cli/commands.py          |  38 ++
 hermes_cli/model_switch.py      |  88 +++++
 hermes_cli/runtime_provider.py  |   4 +
 run_agent.py                    |  13 +-
 tests/test_ollama_cloud_auth.py | 657 ++++++++++++++++++++++++++++++++
 8 files changed, 862 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_ollama_cloud_auth.py

diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index f43b90838..6b1809273 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -34,6 +34,12 @@ model:
   #     base_url: "http://localhost:1234/v1"
   #   No API key needed — local servers typically ignore auth.
   #
+  #   For Ollama Cloud (https://ollama.com/pricing):
+  #     provider: "custom"
+  #     base_url: "https://ollama.com/v1"
+  #   Set OLLAMA_API_KEY in .env — automatically picked up when base_url
+  #   points to ollama.com.
+  #
   # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
   provider: "auto"
   
@@ -789,6 +795,27 @@ display:
   #
   skin: default
 
+# =============================================================================
+# Model Aliases — short names for /model command
+# =============================================================================
+# Map short aliases to exact (model, provider, base_url) tuples.
+# Used by /model tab completion and resolve_alias().
+# Aliases are checked BEFORE the models.dev catalog, so they can route
+# to endpoints not in the catalog (e.g. Ollama Cloud, local servers).
+#
+# model_aliases:
+#   opus:
+#     model: claude-opus-4-6
+#     provider: anthropic
+#   qwen:
+#     model: "qwen3.5:397b"
+#     provider: custom
+#     base_url: "https://ollama.com/v1"
+#   glm:
+#     model: glm-4.7
+#     provider: custom
+#     base_url: "https://ollama.com/v1"
+
 # =============================================================================
 # Privacy
 # =============================================================================
diff --git a/cli.py b/cli.py
index 5802a31e2..096ab9ec4 100644
--- a/cli.py
+++ b/cli.py
@@ -3606,14 +3606,19 @@ class HermesCLI:
             _cprint(f"  ✗ {result.error_message}")
             return
 
-        # Apply to CLI state
+        # Apply to CLI state.
+        # Update requested_provider so _ensure_runtime_credentials() doesn't
+        # overwrite the switch on the next turn (it re-resolves from this).
         old_model = self.model
         self.model = result.new_model
         self.provider = result.target_provider
+        self.requested_provider = result.target_provider
         if result.api_key:
             self.api_key = result.api_key
+            self._explicit_api_key = result.api_key
         if result.base_url:
             self.base_url = result.base_url
+            self._explicit_base_url = result.base_url
         if result.api_mode:
             self.api_mode = result.api_mode
 
@@ -3630,6 +3635,15 @@ class HermesCLI:
             except Exception as exc:
                 _cprint(f"  ⚠ Agent swap failed ({exc}); change applied to next session.")
 
+        # Store a note to prepend to the next user message so the model
+        # knows a switch occurred (avoids injecting system messages mid-history
+        # which breaks providers and prompt caching).
+        self._pending_model_switch_note = (
+            f"[Note: model was just switched from {old_model} to {result.new_model} "
+            f"via {result.provider_label or result.target_provider}. "
+            f"Adjust your self-identification accordingly.]"
+        )
+
         # Display confirmation with full metadata
         provider_label = result.provider_label or result.target_provider
         _cprint(f"  ✓ Model switched: {result.new_model}")
@@ -6347,6 +6361,11 @@ class HermesCLI:
             def run_agent():
                 nonlocal result
                 agent_message = _voice_prefix + message if _voice_prefix else message
+                # Prepend pending model switch note so the model knows about the switch
+                _msn = getattr(self, '_pending_model_switch_note', None)
+                if _msn:
+                    agent_message = _msn + "\n\n" + agent_message
+                    self._pending_model_switch_note = None
                 try:
                     result = self.agent.run_conversation(
                         user_message=agent_message,
diff --git a/gateway/run.py b/gateway/run.py
index 4411b5c68..070b77e18 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3393,6 +3393,16 @@ class GatewayRunner:
             except Exception as exc:
                 logger.warning("In-place model switch failed for cached agent: %s", exc)
 
+        # Store a note to prepend to the next user message so the model
+        # knows about the switch (avoids system messages mid-history).
+        if not hasattr(self, "_pending_model_notes"):
+            self._pending_model_notes = {}
+        self._pending_model_notes[session_key] = (
+            f"[Note: model was just switched from {current_model} to {result.new_model} "
+            f"via {result.provider_label or result.target_provider}. "
+            f"Adjust your self-identification accordingly.]"
+        )
+
         # Store session override so next agent creation uses the new model
         if not hasattr(self, "_session_model_overrides"):
             self._session_model_overrides = {}
@@ -6440,6 +6450,12 @@ class GatewayRunner:
                 except Exception as _e:
                     logger.error("Failed to send approval request: %s", _e)
 
+            # Prepend pending model switch note so the model knows about the switch
+            _pending_notes = getattr(self, '_pending_model_notes', {})
+            _msn = _pending_notes.pop(session_key, None) if session_key else None
+            if _msn:
+                message = _msn + "\n\n" + message
+
             _approval_session_key = session_key or ""
             _approval_session_token = set_current_session_key(_approval_session_key)
             register_gateway_notify(_approval_session_key, _approval_notify_sync)
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 782d52250..e0c769d19 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -745,6 +745,39 @@ class SlashCommandCompleter(Completer):
             )
             count += 1
 
+    def _model_completions(self, sub_text: str, sub_lower: str):
+        """Yield completions for /model from config aliases + built-in aliases."""
+        seen = set()
+        # Config-based direct aliases (preferred — include provider info)
+        try:
+            from hermes_cli.model_switch import (
+                _ensure_direct_aliases, DIRECT_ALIASES, MODEL_ALIASES,
+            )
+            _ensure_direct_aliases()
+            for name, da in DIRECT_ALIASES.items():
+                if name.startswith(sub_lower) and name != sub_lower:
+                    seen.add(name)
+                    yield Completion(
+                        name,
+                        start_position=-len(sub_text),
+                        display=name,
+                        display_meta=f"{da.model} ({da.provider})",
+                    )
+            # Built-in catalog aliases not already covered
+            for name in sorted(MODEL_ALIASES.keys()):
+                if name in seen:
+                    continue
+                if name.startswith(sub_lower) and name != sub_lower:
+                    identity = MODEL_ALIASES[name]
+                    yield Completion(
+                        name,
+                        start_position=-len(sub_text),
+                        display=name,
+                        display_meta=f"{identity.vendor}/{identity.family}",
+                    )
+        except Exception:
+            pass
+
     def get_completions(self, document, complete_event):
         text = document.text_before_cursor
         if not text.startswith("/"):
@@ -766,6 +799,11 @@ class SlashCommandCompleter(Completer):
             sub_text = parts[1] if len(parts) > 1 else ""
             sub_lower = sub_text.lower()
 
+            # Dynamic model alias completions for /model
+            if " " not in sub_text and base_cmd == "/model":
+                yield from self._model_completions(sub_text, sub_lower)
+                return
+
             # Static subcommand completions
             if " " not in sub_text and base_cmd in SUBCOMMANDS:
                 for sub in SUBCOMMANDS[base_cmd]:
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 9534f3765..dc9ca2eec 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -114,6 +114,71 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
 }
 
 
+# ---------------------------------------------------------------------------
+# Direct aliases — exact model+provider+base_url for endpoints that aren't
+# in the models.dev catalog (e.g. Ollama Cloud, local servers).
+# Checked BEFORE catalog resolution.  Format:
+#   alias -> (model_id, provider, base_url)
+# These can also be loaded from config.yaml ``model_aliases:`` section.
+# ---------------------------------------------------------------------------
+
+class DirectAlias(NamedTuple):
+    """Exact model mapping that bypasses catalog resolution."""
+    model: str
+    provider: str
+    base_url: str
+
+
+# Built-in direct aliases (can be extended via config.yaml model_aliases:)
+_BUILTIN_DIRECT_ALIASES: dict[str, DirectAlias] = {}
+
+# Merged dict (builtins + user config); populated by _load_direct_aliases()
+DIRECT_ALIASES: dict[str, DirectAlias] = {}
+
+
+def _load_direct_aliases() -> dict[str, DirectAlias]:
+    """Load direct aliases from config.yaml ``model_aliases:`` section.
+
+    Config format::
+
+        model_aliases:
+          qwen:
+            model: "qwen3.5:397b"
+            provider: custom
+            base_url: "https://ollama.com/v1"
+          minimax:
+            model: "minimax-m2.7"
+            provider: custom
+            base_url: "https://ollama.com/v1"
+    """
+    merged = dict(_BUILTIN_DIRECT_ALIASES)
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        user_aliases = cfg.get("model_aliases")
+        if isinstance(user_aliases, dict):
+            for name, entry in user_aliases.items():
+                if not isinstance(entry, dict):
+                    continue
+                model = entry.get("model", "")
+                provider = entry.get("provider", "custom")
+                base_url = entry.get("base_url", "")
+                if model:
+                    merged[name.strip().lower()] = DirectAlias(
+                        model=model, provider=provider, base_url=base_url,
+                    )
+    except Exception:
+        pass
+    return merged
+
+
+def _ensure_direct_aliases() -> None:
+    """Lazy-load direct aliases on first use."""
+    global DIRECT_ALIASES
+    if not DIRECT_ALIASES:
+        DIRECT_ALIASES = _load_direct_aliases()
+
+
 # ---------------------------------------------------------------------------
 # Result dataclasses
 # ---------------------------------------------------------------------------
@@ -211,6 +276,20 @@ def resolve_alias(
         exist or no matching model is available.
     """
     key = raw_input.strip().lower()
+
+    # Check direct aliases first (exact model+provider+base_url mappings)
+    _ensure_direct_aliases()
+    direct = DIRECT_ALIASES.get(key)
+    if direct is not None:
+        return (direct.provider, direct.model, key)
+
+    # Reverse lookup: match by model ID so full names (e.g. "kimi-k2.5",
+    # "glm-4.7") route through direct aliases instead of falling through
+    # to the catalog/OpenRouter.
+    for alias_name, da in DIRECT_ALIASES.items():
+        if da.model.lower() == key:
+            return (da.provider, da.model, alias_name)
+
     identity = MODEL_ALIASES.get(key)
     if identity is None:
         return None
@@ -487,6 +566,15 @@ def switch_model(
         except Exception:
             pass
 
+    # --- Direct alias override: use exact base_url from the alias if set ---
+    if resolved_alias:
+        _ensure_direct_aliases()
+        _da = DIRECT_ALIASES.get(resolved_alias)
+        if _da is not None and _da.base_url:
+            base_url = _da.base_url
+            if not api_key:
+                api_key = "no-key-required"
+
     # --- Normalize model name for target provider ---
     new_model = normalize_model_for_provider(new_model, target_provider)
 
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 0ed4c826c..b14807231 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -377,9 +377,13 @@ def _resolve_openrouter_runtime(
         ]
     else:
         # Custom endpoint: use api_key from config when using config base_url (#1760).
+        # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
+        # the canonical env var for ollama.com authentication.
+        _is_ollama_url = "ollama.com" in base_url.lower()
         api_key_candidates = [
             explicit_api_key,
             (cfg_api_key if use_config_base_url else ""),
+            (os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
             os.getenv("OPENAI_API_KEY"),
             os.getenv("OPENROUTER_API_KEY"),
         ]
diff --git a/run_agent.py b/run_agent.py
index 7235f9f6c..7aa4a33f4 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4794,8 +4794,19 @@ class AIAgent:
         # access for Codex providers.
         try:
             from agent.auxiliary_client import resolve_provider_client
+            # Pass base_url and api_key from fallback config so custom
+            # endpoints (e.g. Ollama Cloud) resolve correctly instead of
+            # falling through to OpenRouter defaults.
+            fb_base_url_hint = (fb.get("base_url") or "").strip() or None
+            fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+            # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
+            # when no explicit key is in the fallback config.
+            if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+                fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
             fb_client, _ = resolve_provider_client(
-                fb_provider, model=fb_model, raw_codex=True)
+                fb_provider, model=fb_model, raw_codex=True,
+                explicit_base_url=fb_base_url_hint,
+                explicit_api_key=fb_api_key_hint)
             if fb_client is None:
                 logging.warning(
                     "Fallback to %s failed: provider not configured",
diff --git a/tests/test_ollama_cloud_auth.py b/tests/test_ollama_cloud_auth.py
new file mode 100644
index 000000000..7a5dbf6ae
--- /dev/null
+++ b/tests/test_ollama_cloud_auth.py
@@ -0,0 +1,657 @@
+"""Tests for Ollama Cloud authentication and /model switch fixes.
+
+Covers:
+- OLLAMA_API_KEY resolution for custom endpoints pointing to ollama.com
+- Fallback provider passing base_url/api_key to resolve_provider_client
+- /model command updating requested_provider for session persistence
+- Direct alias resolution from config.yaml model_aliases
+- Reverse lookup: full model names match direct aliases
+- /model tab completion for model aliases
+"""
+
+import os
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+# ---------------------------------------------------------------------------
+# OLLAMA_API_KEY credential resolution
+# ---------------------------------------------------------------------------
+
+class TestOllamaCloudCredentials:
+    """runtime_provider should use OLLAMA_API_KEY for ollama.com endpoints."""
+
+    def test_ollama_api_key_used_for_ollama_endpoint(self, monkeypatch, tmp_path):
+        """When base_url contains ollama.com, OLLAMA_API_KEY is in the candidate chain."""
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-ollama-key-12345")
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+        # Mock config to return custom provider with ollama base_url
+        mock_config = {
+            "model": {
+                "default": "qwen3.5:397b",
+                "provider": "custom",
+                "base_url": "https://ollama.com/v1",
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.runtime_provider._get_model_config",
+            lambda: mock_config.get("model", {}),
+        )
+
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        runtime = resolve_runtime_provider(requested="custom")
+
+        assert runtime["base_url"] == "https://ollama.com/v1"
+        assert runtime["api_key"] == "test-ollama-key-12345"
+        assert runtime["provider"] == "custom"
+
+    def test_ollama_key_not_used_for_non_ollama_endpoint(self, monkeypatch):
+        """OLLAMA_API_KEY should NOT be used for non-ollama endpoints."""
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-ollama-key")
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+        mock_config = {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://localhost:11434/v1",
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.runtime_provider._get_model_config",
+            lambda: mock_config.get("model", {}),
+        )
+
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        runtime = resolve_runtime_provider(requested="custom")
+
+        # Should fall through to no-key-required for local endpoints
+        assert runtime["api_key"] != "test-ollama-key"
+
+
+# ---------------------------------------------------------------------------
+# Direct alias resolution
+# ---------------------------------------------------------------------------
+
+class TestDirectAliases:
+    """model_switch direct aliases from config.yaml model_aliases."""
+
+    def test_direct_alias_loaded_from_config(self, monkeypatch):
+        """Direct aliases load from config.yaml model_aliases section."""
+        mock_config = {
+            "model_aliases": {
+                "mymodel": {
+                    "model": "custom-model:latest",
+                    "provider": "custom",
+                    "base_url": "https://example.com/v1",
+                }
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+
+        assert "mymodel" in aliases
+        assert aliases["mymodel"].model == "custom-model:latest"
+        assert aliases["mymodel"].provider == "custom"
+        assert aliases["mymodel"].base_url == "https://example.com/v1"
+
+    def test_direct_alias_resolved_before_catalog(self, monkeypatch):
+        """Direct aliases take priority over models.dev catalog lookup."""
+        from hermes_cli.model_switch import DirectAlias, resolve_alias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "glm": DirectAlias("glm-4.7", "custom", "https://ollama.com/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        result = resolve_alias("glm", "openrouter")
+        assert result is not None
+        provider, model, alias = result
+        assert model == "glm-4.7"
+        assert provider == "custom"
+        assert alias == "glm"
+
+    def test_reverse_lookup_by_model_id(self, monkeypatch):
+        """Full model names (e.g. 'kimi-k2.5') match via reverse lookup."""
+        from hermes_cli.model_switch import DirectAlias, resolve_alias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "kimi": DirectAlias("kimi-k2.5", "custom", "https://ollama.com/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        # Typing full model name should resolve through the alias
+        result = resolve_alias("kimi-k2.5", "openrouter")
+        assert result is not None
+        provider, model, alias = result
+        assert model == "kimi-k2.5"
+        assert provider == "custom"
+        assert alias == "kimi"
+
+    def test_reverse_lookup_case_insensitive(self, monkeypatch):
+        """Reverse lookup is case-insensitive."""
+        from hermes_cli.model_switch import DirectAlias, resolve_alias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "glm": DirectAlias("GLM-4.7", "custom", "https://ollama.com/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        result = resolve_alias("glm-4.7", "openrouter")
+        assert result is not None
+        assert result[1] == "GLM-4.7"
+
+
+# ---------------------------------------------------------------------------
+# /model command persistence
+# ---------------------------------------------------------------------------
+
+class TestModelSwitchPersistence:
+    """CLI /model command should update requested_provider for session persistence."""
+
+    def test_model_switch_result_fields(self):
+        """ModelSwitchResult has all required fields for CLI state update."""
+        from hermes_cli.model_switch import ModelSwitchResult
+
+        result = ModelSwitchResult(
+            success=True,
+            new_model="claude-opus-4-6",
+            target_provider="anthropic",
+            provider_changed=True,
+            api_key="test-key",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+        )
+
+        assert result.success
+        assert result.new_model == "claude-opus-4-6"
+        assert result.target_provider == "anthropic"
+        assert result.api_key == "test-key"
+        assert result.base_url == "https://api.anthropic.com"
+
+
+# ---------------------------------------------------------------------------
+# /model tab completion
+# ---------------------------------------------------------------------------
+
+class TestModelTabCompletion:
+    """SlashCommandCompleter provides model alias completions for /model."""
+
+    def test_model_completions_yields_direct_aliases(self, monkeypatch):
+        """_model_completions yields direct aliases with model and provider info."""
+        from hermes_cli.commands import SlashCommandCompleter
+        from hermes_cli.model_switch import DirectAlias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "opus": DirectAlias("claude-opus-4-6", "anthropic", ""),
+            "qwen": DirectAlias("qwen3.5:397b", "custom", "https://ollama.com/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        completer = SlashCommandCompleter()
+        completions = list(completer._model_completions("", ""))
+
+        names = [c.text for c in completions]
+        assert "opus" in names
+        assert "qwen" in names
+
+    def test_model_completions_filters_by_prefix(self, monkeypatch):
+        """Completions filter by typed prefix."""
+        from hermes_cli.commands import SlashCommandCompleter
+        from hermes_cli.model_switch import DirectAlias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "opus": DirectAlias("claude-opus-4-6", "anthropic", ""),
+            "qwen": DirectAlias("qwen3.5:397b", "custom", "https://ollama.com/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        completer = SlashCommandCompleter()
+        completions = list(completer._model_completions("o", "o"))
+
+        names = [c.text for c in completions]
+        assert "opus" in names
+        assert "qwen" not in names
+
+    def test_model_completions_shows_metadata(self, monkeypatch):
+        """Completions include model name and provider in display_meta."""
+        from hermes_cli.commands import SlashCommandCompleter
+        from hermes_cli.model_switch import DirectAlias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "glm": DirectAlias("glm-4.7", "custom", "https://ollama.com/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        completer = SlashCommandCompleter()
+        completions = list(completer._model_completions("g", "g"))
+
+        assert len(completions) >= 1
+        glm_comp = [c for c in completions if c.text == "glm"][0]
+        meta_str = str(glm_comp.display_meta)
+        assert "glm-4.7" in meta_str
+        assert "custom" in meta_str
+
+
+# ---------------------------------------------------------------------------
+# Fallback base_url passthrough
+# ---------------------------------------------------------------------------
+
+class TestFallbackBaseUrlPassthrough:
+    """_try_activate_fallback should pass base_url from fallback config."""
+
+    def test_fallback_config_has_base_url(self):
+        """Verify fallback_providers config structure supports base_url."""
+        # This tests the contract: fallback dicts can have base_url
+        fb = {
+            "provider": "custom",
+            "model": "qwen3.5:397b",
+            "base_url": "https://ollama.com/v1",
+        }
+        assert fb.get("base_url") == "https://ollama.com/v1"
+
+    def test_ollama_key_lookup_for_fallback(self, monkeypatch):
+        """When fallback base_url is ollama.com and no api_key, OLLAMA_API_KEY is used."""
+        monkeypatch.setenv("OLLAMA_API_KEY", "fb-ollama-key")
+
+        fb = {
+            "provider": "custom",
+            "model": "qwen3.5:397b",
+            "base_url": "https://ollama.com/v1",
+        }
+
+        fb_base_url_hint = (fb.get("base_url") or "").strip() or None
+        fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+
+        if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+            fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
+
+        assert fb_api_key_hint == "fb-ollama-key"
+        assert fb_base_url_hint == "https://ollama.com/v1"
+
+
+# ---------------------------------------------------------------------------
+# Edge cases: _load_direct_aliases
+# ---------------------------------------------------------------------------
+
+class TestLoadDirectAliasesEdgeCases:
+    """Edge cases for _load_direct_aliases parsing."""
+
+    def test_empty_model_aliases_config(self, monkeypatch):
+        """Empty model_aliases dict returns only builtins (if any)."""
+        mock_config = {"model_aliases": {}}
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert isinstance(aliases, dict)
+
+    def test_model_aliases_not_a_dict(self, monkeypatch):
+        """Non-dict model_aliases value is gracefully ignored."""
+        mock_config = {"model_aliases": "bad-string-value"}
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert isinstance(aliases, dict)
+
+    def test_model_aliases_none_value(self, monkeypatch):
+        """model_aliases: null in config is handled gracefully."""
+        mock_config = {"model_aliases": None}
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert isinstance(aliases, dict)
+
+    def test_malformed_entry_without_model_key(self, monkeypatch):
+        """Entries missing 'model' key are skipped."""
+        mock_config = {
+            "model_aliases": {
+                "bad_entry": {
+                    "provider": "custom",
+                    "base_url": "https://example.com/v1",
+                },
+                "good_entry": {
+                    "model": "valid-model",
+                    "provider": "custom",
+                },
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert "bad_entry" not in aliases
+        assert "good_entry" in aliases
+
+    def test_malformed_entry_non_dict_value(self, monkeypatch):
+        """Non-dict entry values are skipped."""
+        mock_config = {
+            "model_aliases": {
+                "string_entry": "just-a-string",
+                "none_entry": None,
+                "list_entry": ["a", "b"],
+                "good": {"model": "real-model", "provider": "custom"},
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert "string_entry" not in aliases
+        assert "none_entry" not in aliases
+        assert "list_entry" not in aliases
+        assert "good" in aliases
+
+    def test_load_config_exception_returns_builtins(self, monkeypatch):
+        """If load_config raises, _load_direct_aliases returns builtins only."""
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: (_ for _ in ()).throw(RuntimeError("config broken")),
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert isinstance(aliases, dict)
+
+    def test_alias_name_normalized_lowercase(self, monkeypatch):
+        """Alias names are lowercased and stripped."""
+        mock_config = {
+            "model_aliases": {
+                "  MyModel  ": {
+                    "model": "my-model:latest",
+                    "provider": "custom",
+                }
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert "mymodel" in aliases
+        assert "  MyModel  " not in aliases
+
+    def test_empty_model_string_skipped(self, monkeypatch):
+        """Entries with empty model string are skipped."""
+        mock_config = {
+            "model_aliases": {
+                "empty": {"model": "", "provider": "custom"},
+                "good": {"model": "real", "provider": "custom"},
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+
+        from hermes_cli.model_switch import _load_direct_aliases
+        aliases = _load_direct_aliases()
+        assert "empty" not in aliases
+        assert "good" in aliases
+
+
+# ---------------------------------------------------------------------------
+# _ensure_direct_aliases idempotency
+# ---------------------------------------------------------------------------
+
+class TestEnsureDirectAliases:
+    """_ensure_direct_aliases lazy-loading behavior."""
+
+    def test_ensure_populates_on_first_call(self, monkeypatch):
+        """DIRECT_ALIASES is populated after _ensure_direct_aliases."""
+        import hermes_cli.model_switch as ms
+
+        mock_config = {
+            "model_aliases": {
+                "test": {"model": "test-model", "provider": "custom"},
+            }
+        }
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: mock_config,
+        )
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", {})
+        ms._ensure_direct_aliases()
+        assert "test" in ms.DIRECT_ALIASES
+
+    def test_ensure_no_reload_when_populated(self, monkeypatch):
+        """_ensure_direct_aliases does not reload if already populated."""
+        import hermes_cli.model_switch as ms
+        from hermes_cli.model_switch import DirectAlias
+
+        existing = {"pre": DirectAlias("pre-model", "custom", "")}
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", existing)
+
+        call_count = [0]
+        original_load = ms._load_direct_aliases
+        def counting_load():
+            call_count[0] += 1
+            return original_load()
+        monkeypatch.setattr(ms, "_load_direct_aliases", counting_load)
+
+        ms._ensure_direct_aliases()
+        assert call_count[0] == 0
+        assert "pre" in ms.DIRECT_ALIASES
+
+
+# ---------------------------------------------------------------------------
+# resolve_alias: fallthrough and edge cases
+# ---------------------------------------------------------------------------
+
+class TestResolveAliasEdgeCases:
+    """Edge cases for resolve_alias."""
+
+    def test_unknown_alias_returns_none(self, monkeypatch):
+        """Unknown alias not in direct or catalog returns None."""
+        import hermes_cli.model_switch as ms
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", {})
+
+        result = ms.resolve_alias("nonexistent_model_xyz", "openrouter")
+        assert result is None
+
+    def test_whitespace_input_handled(self, monkeypatch):
+        """Input with whitespace is stripped before lookup."""
+        from hermes_cli.model_switch import DirectAlias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "myalias": DirectAlias("my-model", "custom", "https://example.com"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        result = ms.resolve_alias("  myalias  ", "openrouter")
+        assert result is not None
+        assert result[1] == "my-model"
+
+
+# ---------------------------------------------------------------------------
+# switch_model: direct alias base_url override
+# ---------------------------------------------------------------------------
+
+class TestSwitchModelDirectAliasOverride:
+    """switch_model should use base_url from direct alias."""
+
+    def test_switch_model_uses_alias_base_url(self, monkeypatch):
+        """When resolved alias has base_url, switch_model should use it."""
+        from hermes_cli.model_switch import DirectAlias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "qwen": DirectAlias("qwen3.5:397b", "custom", "https://ollama.com/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+
+        monkeypatch.setattr(ms, "resolve_alias",
+            lambda raw, prov: ("custom", "qwen3.5:397b", "qwen"))
+
+        monkeypatch.setattr(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            lambda requested: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"},
+        )
+
+        monkeypatch.setattr("hermes_cli.models.validate_requested_model",
+            lambda *a, **kw: {"accepted": True, "persist": True, "recognized": True, "message": None})
+        monkeypatch.setattr("hermes_cli.models.opencode_model_api_mode",
+            lambda *a, **kw: "openai_compat")
+
+        result = ms.switch_model("qwen", "openrouter", "old-model")
+        assert result.success
+        assert result.base_url == "https://ollama.com/v1"
+        assert result.new_model == "qwen3.5:397b"
+
+    def test_switch_model_alias_no_api_key_gets_default(self, monkeypatch):
+        """When alias has base_url but no api_key, 'no-key-required' is set."""
+        from hermes_cli.model_switch import DirectAlias
+        import hermes_cli.model_switch as ms
+
+        test_aliases = {
+            "local": DirectAlias("local-model", "custom", "http://localhost:11434/v1"),
+        }
+        monkeypatch.setattr(ms, "DIRECT_ALIASES", test_aliases)
+        monkeypatch.setattr(ms, "resolve_alias",
+            lambda raw, prov: ("custom", "local-model", "local"))
+        monkeypatch.setattr(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            lambda requested: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"},
+        )
+        monkeypatch.setattr("hermes_cli.models.validate_requested_model",
+            lambda *a, **kw: {"accepted": True, "persist": True, "recognized": True, "message": None})
+        monkeypatch.setattr("hermes_cli.models.opencode_model_api_mode",
+            lambda *a, **kw: "openai_compat")
+
+        result = ms.switch_model("local", "openrouter", "old-model")
+        assert result.success
+        assert result.api_key == "no-key-required"
+        assert result.base_url == "http://localhost:11434/v1"
+
+
+# ---------------------------------------------------------------------------
+# CLI state update: requested_provider persistence
+# ---------------------------------------------------------------------------
+
+class TestCLIStateUpdate:
+    """CLI /model handler should update requested_provider and explicit fields."""
+
+    def test_model_switch_result_has_provider_label(self):
+        """ModelSwitchResult supports provider_label for display."""
+        from hermes_cli.model_switch import ModelSwitchResult
+
+        result = ModelSwitchResult(
+            success=True,
+            new_model="qwen3.5:397b",
+            target_provider="custom",
+            provider_changed=True,
+            api_key="key",
+            base_url="https://ollama.com/v1",
+            api_mode="openai_compat",
+            provider_label="Ollama Cloud",
+        )
+        assert result.provider_label == "Ollama Cloud"
+
+    def test_model_switch_result_defaults(self):
+        """ModelSwitchResult has sensible defaults."""
+        from hermes_cli.model_switch import ModelSwitchResult
+
+        result = ModelSwitchResult(
+            success=False,
+            new_model="",
+            target_provider="",
+            provider_changed=False,
+            error_message="Something failed",
+        )
+        assert not result.success
+        assert result.error_message == "Something failed"
+        assert result.api_key is None or result.api_key == ""
+        assert result.base_url is None or result.base_url == ""
+
+
+# ---------------------------------------------------------------------------
+# Fallback: OLLAMA_API_KEY edge cases
+# ---------------------------------------------------------------------------
+
+class TestFallbackEdgeCases:
+    """Edge cases for fallback OLLAMA_API_KEY logic."""
+
+    def test_ollama_key_not_injected_for_localhost(self, monkeypatch):
+        """OLLAMA_API_KEY should not be injected for localhost URLs."""
+        monkeypatch.setenv("OLLAMA_API_KEY", "should-not-use")
+
+        fb = {
+            "provider": "custom",
+            "model": "local-model",
+            "base_url": "http://localhost:11434/v1",
+        }
+
+        fb_base_url_hint = (fb.get("base_url") or "").strip() or None
+        fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+
+        if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+            fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
+
+        assert fb_api_key_hint is None
+
+    def test_explicit_api_key_not_overridden_by_ollama_key(self, monkeypatch):
+        """Explicit api_key in fallback config is not overridden by OLLAMA_API_KEY."""
+        monkeypatch.setenv("OLLAMA_API_KEY", "env-key")
+
+        fb = {
+            "provider": "custom",
+            "model": "qwen3.5:397b",
+            "base_url": "https://ollama.com/v1",
+            "api_key": "explicit-key",
+        }
+
+        fb_base_url_hint = (fb.get("base_url") or "").strip() or None
+        fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+
+        if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+            fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
+
+        assert fb_api_key_hint == "explicit-key"
+
+    def test_no_base_url_in_fallback(self, monkeypatch):
+        """Fallback with no base_url doesn't crash."""
+        monkeypatch.setenv("OLLAMA_API_KEY", "some-key")
+
+        fb = {"provider": "openrouter", "model": "some-model"}
+
+        fb_base_url_hint = (fb.get("base_url") or "").strip() or None
+        fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+
+        if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+            fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
+
+        assert fb_base_url_hint is None
+        assert fb_api_key_hint is None