diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 74ea8182533..3049bb45f99 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -144,6 +144,22 @@ def _start_desktop_cron_ticker(stop_event: "threading.Event", interval: int = 60 provider.start(stop_event, interval=interval) +def _warm_gateway_module() -> None: + try: + import hermes_cli.gateway # noqa: F401 + except Exception: + pass + + +def _resolve_restart_drain_timeout() -> float: + try: + from hermes_cli.gateway import _get_restart_drain_timeout + return _get_restart_drain_timeout() + except ImportError: + from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT + return DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT + + @asynccontextmanager async def _lifespan(app: "FastAPI"): app.state.event_channels = {} # dict[str, set] @@ -154,6 +170,14 @@ async def _lifespan(app: "FastAPI"): # event loop during lifespan startup — see _get_event_state's docstring. app.state.chat_argv_lock = asyncio.Lock() + # Fire hermes_cli.gateway import into a background thread so the event + # loop is not blocked and HERMES_DASHBOARD_READY fires without delay. + # On a cold Windows install the module chain triggers .pyc compilation + # and Defender real-time scans that can stall the event loop for 15-30s. + # Running in an executor means the cost is paid in a worker thread while + # the server socket is already open and accepting probes. + asyncio.get_event_loop().run_in_executor(None, _warm_gateway_module) + # Desktop-spawned backends (HERMES_DESKTOP=1) fire cron jobs themselves, # since the app has no gateway running the scheduler. Server `hermes # dashboard` is unaffected — it relies on its own gateway. @@ -1855,19 +1879,15 @@ async def get_status(profile: Optional[str] = None): gateway_state=gateway_state, ) # Resolved drain timeout (seconds) so NAS can size its poll deadline - # without out-of-band knowledge. Reuse the single resolver - # (HERMES_RESTART_DRAIN_TIMEOUT env → config agent.restart_drain_timeout - # → default) rather than re-deriving the precedence chain here. - try: - from hermes_cli.gateway import _get_restart_drain_timeout - - restart_drain_timeout = _get_restart_drain_timeout() - except ImportError: - # Resolver moved/renamed — fall back to the real default so the - # field stays a numeric poll-deadline hint, never None. - from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT - - restart_drain_timeout = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT + # without out-of-band knowledge. Offload to a thread: on a cold + # Windows install the first import of hermes_cli.gateway blocks the + # asyncio event loop for 15-30s (.pyc compilation + Defender scans), + # exceeding the desktop handshake's 15s socket timeout. After the + # first call the module is in sys.modules and run_in_executor returns + # in microseconds. + restart_drain_timeout = await asyncio.get_running_loop().run_in_executor( + None, _resolve_restart_drain_timeout + ) # Dashboard auth gate (Phase 7): surface whether the gate is engaged # and which providers are registered so ``hermes status`` and the