diff --git a/cli.py b/cli.py index 0e97c480d57..8910e2d8c5d 100644 --- a/cli.py +++ b/cli.py @@ -13094,6 +13094,16 @@ class HermesCLI: _welcome_color = "#FFF8DC" self._console_print(f"[{_welcome_color}]{_welcome_text}[/]") + # Warm the /model picker's provider-models cache off-thread during this + # idle window (banner shown, user about to type). The no-args picker + # otherwise blocks ~1-2s on serial /v1/models fetches the first time + # it's opened in a session. Fire-and-forget, guarded once-per-process. + try: + from hermes_cli.model_switch import prewarm_picker_cache_async + prewarm_picker_cache_async() + except Exception: + pass + # Redaction opt-out warning (#17691): ON by default, loud when off. # The redactor snapshots its state at import time so any toggle now # won't affect the running process — we just want the operator to diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 0d1f6fa44d6..c4e76b0d5bc 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -1117,6 +1117,62 @@ def switch_model( # Authenticated providers listing (for /model no-args display) # --------------------------------------------------------------------------- +# Process-level guard so the picker prewarm thread is spawned at most once per +# process — mirrors run_agent's _openrouter_prewarm_done. Without a guard a +# long-lived process (or repeated triggers) would leak one OS thread per call. +import threading as _threading # noqa: E402 + +_picker_prewarm_done = _threading.Event() + + +def prewarm_picker_cache_async() -> Optional["_threading.Thread"]: + """Warm the provider-models disk cache in a background daemon thread. + + The no-args ``/model`` picker calls ``list_authenticated_providers()``, + which fetches each authenticated provider's live ``/v1/models`` list on a + cold/stale cache. Those fetches are independent HTTP round-trips but run + serially, so the first ``/model`` open in a session (or any open after the + 1h cache TTL expires) blocks ~1-2s on the user's critical path. + + This pre-warms that exact path off-thread during idle session time: it + runs ``list_authenticated_providers()`` once, which populates + ``provider_models_cache.json`` for every authed provider. By the time the + user types ``/model``, the picker hits the warm disk cache and renders in + ~100ms. + + Fire-and-forget. Process-level Event guard ensures it runs at most once. + Fully exception-isolated — a slow or offline provider can never affect the + session. Returns the spawned thread (for tests) or None if already warmed. + """ + if _picker_prewarm_done.is_set(): + return None + _picker_prewarm_done.set() + + def _warm() -> None: + try: + from hermes_cli.inventory import load_picker_context + + ctx = load_picker_context() + # Calling this is what populates cached_provider_model_ids() -> + # provider_models_cache.json for each authed provider. We discard + # the result; the side effect (warm disk cache) is the point. + list_authenticated_providers( + current_provider=ctx.current_provider, + current_base_url=ctx.current_base_url, + current_model=ctx.current_model, + user_providers=ctx.user_providers, + custom_providers=ctx.custom_providers, + max_models=50, + ) + except Exception: + # Best-effort warmup — never surface errors into the session. + logger.debug("picker cache prewarm failed", exc_info=True) + + t = _threading.Thread(target=_warm, daemon=True, name="picker-cache-prewarm") + t.start() + return t + + def list_authenticated_providers( current_provider: str = "", current_base_url: str = "", diff --git a/tests/hermes_cli/test_picker_prewarm.py b/tests/hermes_cli/test_picker_prewarm.py new file mode 100644 index 00000000000..3ddc873f70e --- /dev/null +++ b/tests/hermes_cli/test_picker_prewarm.py @@ -0,0 +1,60 @@ +"""Tests for the /model picker background cache prewarm. + +``prewarm_picker_cache_async()`` warms the provider-models disk cache off the +user's critical path so the first ``/model`` open in a session is fast instead +of blocking ~1-2s on serial /v1/models fetches. These pin the two contracts +that matter: it runs the warm path exactly once per process (no thread leak), +and it delegates to ``list_authenticated_providers`` to do the warming. +""" + +from __future__ import annotations + +from unittest.mock import patch + +import hermes_cli.model_switch as ms + + +def _reset_guard(): + ms._picker_prewarm_done.clear() + + +def test_prewarm_runs_list_authenticated_providers_once(): + """First call spawns a thread that calls list_authenticated_providers; + the warm side effect is delegated there (which disk-caches per provider).""" + _reset_guard() + with patch.object(ms, "list_authenticated_providers", return_value=[]) as mock_list: + t = ms.prewarm_picker_cache_async() + assert t is not None, "first call must spawn a prewarm thread" + t.join(timeout=10) + assert not t.is_alive(), "prewarm thread should finish promptly" + mock_list.assert_called_once() + _reset_guard() + + +def test_prewarm_guard_is_once_per_process(): + """The process-level Event guard must make repeat calls no-ops so a + long-lived process never leaks one OS thread per call.""" + _reset_guard() + with patch.object(ms, "list_authenticated_providers", return_value=[]): + t1 = ms.prewarm_picker_cache_async() + assert t1 is not None + t1.join(timeout=10) + # Subsequent calls return None (guard set) — no new thread. + assert ms.prewarm_picker_cache_async() is None + assert ms.prewarm_picker_cache_async() is None + _reset_guard() + + +def test_prewarm_never_raises_on_failure(): + """A failing/offline provider path must be fully swallowed — the prewarm + is best-effort and must never surface errors into the session.""" + _reset_guard() + with patch.object( + ms, "list_authenticated_providers", side_effect=RuntimeError("boom") + ): + t = ms.prewarm_picker_cache_async() + assert t is not None + # join must not raise; the worker swallows the exception internally. + t.join(timeout=10) + assert not t.is_alive() + _reset_guard()