hermes-agent/tests/test_web_server.py
Teknium 050bd01b7b
fix(dashboard): serve uvicorn on SelectorEventLoop on Windows (#50641) (#51717)
On Windows, start_server() served uvicorn via a bare asyncio.run(_serve()),
which uses the default ProactorEventLoop. uvicorn's socket-serving stack
assumes a SelectorEventLoop on win32 (uvicorn/loops/asyncio.py forces it, and
uvicorn.Server.run threads config.get_loop_factory() into its runner for
exactly this reason). Driving uvicorn on the proactor loop makes
server.startup() bind a socket that never accepts: the dashboard and desktop
backend print "Skipping web UI build" then hang forever with the port
LISTENING but no TCP handshake completing.

Fix is win32-scoped to keep the blast radius minimal: POSIX keeps the exact
asyncio.run(_serve()) it had (its default loop is already a SelectorEventLoop /
uvloop, which is what uvicorn serves on). Only on Windows do we mirror
uvicorn.Server.run and run on the loop factory uvicorn picks, with a fallback
to WindowsSelectorEventLoopPolicy for uvicorn < 0.36.

Fixes hermes dashboard and hermes desktop (the Electron app spawns a
hermes dashboard backend). The gateway symptom in the report has a separate
root cause (no uvicorn) and is not addressed here.
2026-06-23 23:43:24 -07:00

177 lines
5.9 KiB
Python

"""Test that start_server configures ws-ping keepalive.
The server now uses uvicorn.Server directly (not uvicorn.run) so we stub
Config + Server + asyncio.run to capture kwargs without starting an event loop.
"""
import asyncio
import contextlib
import uvicorn
from hermes_cli import web_server
def _stub_uvicorn(monkeypatch):
"""Replace uvicorn.Config/Server with fakes so start_server returns
immediately. Returns a dict with captured Config kwargs."""
captured: dict = {}
class _FakeConfig:
loaded = True
host = "127.0.0.1"
port = 8000
_loop_factory = None
def __init__(self, *args, **kwargs):
captured.update(kwargs)
def load(self):
pass
def get_loop_factory(self):
return self._loop_factory
class lifespan_class:
should_exit = False
state: dict = {}
def __init__(self, *a, **kw):
pass
async def startup(self):
pass
async def shutdown(self):
pass
class _FakeServer:
should_exit = False
started = True
servers: list = []
lifespan = None
@staticmethod
def capture_signals():
return contextlib.nullcontext()
async def startup(self, sockets=None):
pass
async def main_loop(self):
pass
async def shutdown(self, sockets=None):
pass
monkeypatch.setattr(uvicorn, "Config", _FakeConfig)
monkeypatch.setattr(uvicorn, "Server", lambda config: _FakeServer())
return captured
def test_start_server_enables_ws_ping_for_half_open_detection(monkeypatch):
"""WS ping must be configured so half-open connections (reverse-proxy 524,
dropped tunnels) raise WebSocketDisconnect into the reaping path (#32377)."""
captured = _stub_uvicorn(monkeypatch)
# Loopback bind => no auth gate, so this reaches the Config constructor.
web_server.start_server(host="127.0.0.1", port=0, open_browser=False)
assert captured["ws_ping_interval"] == 20.0
assert captured["ws_ping_timeout"] == 20.0
def test_start_server_runs_on_uvicorns_loop_factory(monkeypatch):
"""The dashboard/desktop backend must serve uvicorn on the loop *uvicorn*
selects, not the interpreter default.
On Windows ``asyncio.run`` defaults to a ProactorEventLoop, but uvicorn's
socket-serving stack forces a SelectorEventLoop on win32
(``uvicorn/loops/asyncio.py``). Serving on the proactor loop binds a socket
that never accepts — the backend prints "Skipping web UI build" and hangs
forever with the port LISTENING but no TCP handshake (#50641). We fix that
by routing the serve call through ``uvicorn._compat.asyncio_run`` with
``config.get_loop_factory()`` — exactly what ``uvicorn.Server.run`` does.
This asserts the behavioral contract: on Windows the loop factory the runner
receives is the one uvicorn's own Config produced, and bare ``asyncio.run``
is never the serve path when the loop-factory runner exists.
"""
_stub_uvicorn(monkeypatch)
# The fix only changes behavior on win32; simulate it so the Windows branch
# is actually exercised on a POSIX CI host.
monkeypatch.setattr(web_server.sys, "platform", "win32")
# The fake Config (installed by _stub_uvicorn) returns its ``_loop_factory``
# from get_loop_factory(). Pin a sentinel so we can assert it is threaded
# through to the runner unchanged.
sentinel_factory = object()
monkeypatch.setattr(uvicorn.Config, "_loop_factory", sentinel_factory, raising=False)
seen: dict = {}
def _fake_runner(coro, *, loop_factory=None):
seen["loop_factory"] = loop_factory
coro.close() # drain without an event loop
monkeypatch.setattr("uvicorn._compat.asyncio_run", _fake_runner, raising=False)
# Bare asyncio.run must NOT be the serve path on Windows when the
# loop-factory runner is importable.
called_bare = {"hit": False}
def _guard_asyncio_run(coro):
called_bare["hit"] = True
coro.close()
return None
monkeypatch.setattr(asyncio, "run", _guard_asyncio_run)
web_server.start_server(host="127.0.0.1", port=0, open_browser=False)
assert seen.get("loop_factory") is sentinel_factory, (
"start_server must pass uvicorn's get_loop_factory() result to the "
"runner so Windows serves on a SelectorEventLoop"
)
assert called_bare["hit"] is False, (
"start_server must not fall back to bare asyncio.run when uvicorn's "
"loop-factory runner is available"
)
def test_start_server_keeps_bare_asyncio_run_on_posix(monkeypatch):
"""POSIX behavior must be byte-for-byte unchanged: serve via the plain
``asyncio.run(_serve())`` path, never the Windows loop-factory branch.
The #50641 fix is intentionally win32-scoped to keep the blast radius
minimal — Python's default loop on POSIX is already a SelectorEventLoop
(or uvloop), which is what uvicorn serves on, so there is nothing to fix.
"""
_stub_uvicorn(monkeypatch)
monkeypatch.setattr(web_server.sys, "platform", "linux")
# If the Windows branch were taken, the loop-factory runner would fire.
runner_called = {"hit": False}
def _fake_runner(coro, *, loop_factory=None):
runner_called["hit"] = True
coro.close()
monkeypatch.setattr("uvicorn._compat.asyncio_run", _fake_runner, raising=False)
bare_called = {"hit": False}
def _fake_asyncio_run(coro):
bare_called["hit"] = True
coro.close()
return None
monkeypatch.setattr(asyncio, "run", _fake_asyncio_run)
web_server.start_server(host="127.0.0.1", port=0, open_browser=False)
assert bare_called["hit"] is True, "POSIX must serve via bare asyncio.run"
assert runner_called["hit"] is False, (
"POSIX must not take the Windows loop-factory branch"
)