diff --git a/cli.py b/cli.py
index 0e97c480d57..8910e2d8c5d 100644
--- a/cli.py
+++ b/cli.py
@@ -13094,6 +13094,16 @@ class HermesCLI:
             _welcome_color = "#FFF8DC"
         self._console_print(f"[{_welcome_color}]{_welcome_text}[/]")
 
+        # Warm the /model picker's provider-models cache off-thread during this
+        # idle window (banner shown, user about to type). The no-args picker
+        # otherwise blocks ~1-2s on serial /v1/models fetches the first time
+        # it's opened in a session. Fire-and-forget, guarded once-per-process.
+        try:
+            from hermes_cli.model_switch import prewarm_picker_cache_async
+            prewarm_picker_cache_async()
+        except Exception:
+            pass
+
         # Redaction opt-out warning (#17691): ON by default, loud when off.
         # The redactor snapshots its state at import time so any toggle now
         # won't affect the running process — we just want the operator to
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 0d1f6fa44d6..c4e76b0d5bc 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -1117,6 +1117,62 @@ def switch_model(
 # Authenticated providers listing (for /model no-args display)
 # ---------------------------------------------------------------------------
 
+# Process-level guard so the picker prewarm thread is spawned at most once per
+# process — mirrors run_agent's _openrouter_prewarm_done. Without a guard a
+# long-lived process (or repeated triggers) would leak one OS thread per call.
+import threading as _threading  # noqa: E402
+
+_picker_prewarm_done = _threading.Event()
+
+
+def prewarm_picker_cache_async() -> Optional["_threading.Thread"]:
+    """Warm the provider-models disk cache in a background daemon thread.
+
+    The no-args ``/model`` picker calls ``list_authenticated_providers()``,
+    which fetches each authenticated provider's live ``/v1/models`` list on a
+    cold/stale cache. Those fetches are independent HTTP round-trips but run
+    serially, so the first ``/model`` open in a session (or any open after the
+    1h cache TTL expires) blocks ~1-2s on the user's critical path.
+
+    This pre-warms that exact path off-thread during idle session time: it
+    runs ``list_authenticated_providers()`` once, which populates
+    ``provider_models_cache.json`` for every authed provider. By the time the
+    user types ``/model``, the picker hits the warm disk cache and renders in
+    ~100ms.
+
+    Fire-and-forget. Process-level Event guard ensures it runs at most once.
+    Fully exception-isolated — a slow or offline provider can never affect the
+    session. Returns the spawned thread (for tests) or None if already warmed.
+    """
+    if _picker_prewarm_done.is_set():
+        return None
+    _picker_prewarm_done.set()
+
+    def _warm() -> None:
+        try:
+            from hermes_cli.inventory import load_picker_context
+
+            ctx = load_picker_context()
+            # Calling this is what populates cached_provider_model_ids() ->
+            # provider_models_cache.json for each authed provider. We discard
+            # the result; the side effect (warm disk cache) is the point.
+            list_authenticated_providers(
+                current_provider=ctx.current_provider,
+                current_base_url=ctx.current_base_url,
+                current_model=ctx.current_model,
+                user_providers=ctx.user_providers,
+                custom_providers=ctx.custom_providers,
+                max_models=50,
+            )
+        except Exception:
+            # Best-effort warmup — never surface errors into the session.
+            logger.debug("picker cache prewarm failed", exc_info=True)
+
+    t = _threading.Thread(target=_warm, daemon=True, name="picker-cache-prewarm")
+    t.start()
+    return t
+
+
 def list_authenticated_providers(
     current_provider: str = "",
     current_base_url: str = "",
diff --git a/tests/hermes_cli/test_picker_prewarm.py b/tests/hermes_cli/test_picker_prewarm.py
new file mode 100644
index 00000000000..3ddc873f70e
--- /dev/null
+++ b/tests/hermes_cli/test_picker_prewarm.py
@@ -0,0 +1,60 @@
+"""Tests for the /model picker background cache prewarm.
+
+``prewarm_picker_cache_async()`` warms the provider-models disk cache off the
+user's critical path so the first ``/model`` open in a session is fast instead
+of blocking ~1-2s on serial /v1/models fetches. These pin the two contracts
+that matter: it runs the warm path exactly once per process (no thread leak),
+and it delegates to ``list_authenticated_providers`` to do the warming.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import hermes_cli.model_switch as ms
+
+
+def _reset_guard():
+    ms._picker_prewarm_done.clear()
+
+
+def test_prewarm_runs_list_authenticated_providers_once():
+    """First call spawns a thread that calls list_authenticated_providers;
+    the warm side effect is delegated there (which disk-caches per provider)."""
+    _reset_guard()
+    with patch.object(ms, "list_authenticated_providers", return_value=[]) as mock_list:
+        t = ms.prewarm_picker_cache_async()
+        assert t is not None, "first call must spawn a prewarm thread"
+        t.join(timeout=10)
+        assert not t.is_alive(), "prewarm thread should finish promptly"
+        mock_list.assert_called_once()
+    _reset_guard()
+
+
+def test_prewarm_guard_is_once_per_process():
+    """The process-level Event guard must make repeat calls no-ops so a
+    long-lived process never leaks one OS thread per call."""
+    _reset_guard()
+    with patch.object(ms, "list_authenticated_providers", return_value=[]):
+        t1 = ms.prewarm_picker_cache_async()
+        assert t1 is not None
+        t1.join(timeout=10)
+        # Subsequent calls return None (guard set) — no new thread.
+        assert ms.prewarm_picker_cache_async() is None
+        assert ms.prewarm_picker_cache_async() is None
+    _reset_guard()
+
+
+def test_prewarm_never_raises_on_failure():
+    """A failing/offline provider path must be fully swallowed — the prewarm
+    is best-effort and must never surface errors into the session."""
+    _reset_guard()
+    with patch.object(
+        ms, "list_authenticated_providers", side_effect=RuntimeError("boom")
+    ):
+        t = ms.prewarm_picker_cache_async()
+        assert t is not None
+        # join must not raise; the worker swallows the exception internally.
+        t.join(timeout=10)
+        assert not t.is_alive()
+    _reset_guard()