diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 300a467de83..0a14fb0ef23 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -13074,4 +13074,45 @@ def start_server( if server.started: await server.shutdown() - asyncio.run(_serve()) + # On POSIX, keep the long-standing ``asyncio.run(_serve())`` behavior + # unchanged — Python's default loop there is already a SelectorEventLoop + # (or uvloop when uvicorn[standard] installs it), which is exactly what + # uvicorn serves on. Touching that path would only widen the blast radius + # for no benefit. + # + # On Windows it is broken: ``asyncio.run`` defaults to a ProactorEventLoop, + # but uvicorn's socket-serving stack assumes a SelectorEventLoop on win32 + # (``uvicorn/loops/asyncio.py`` forces it, and ``uvicorn.Server.run`` threads + # ``config.get_loop_factory()`` into its runner for exactly this reason). + # Driving uvicorn on the proactor loop makes ``server.startup()`` bind a + # socket that never accepts — the dashboard / desktop backend prints + # "Skipping web UI build" and then hangs forever with the port LISTENING but + # no TCP handshake completing (#50641). So *only on Windows* we mirror + # uvicorn's own machinery and run on the loop factory it picks. + if sys.platform != "win32": + asyncio.run(_serve()) + return + + # Windows-only path. Resolve the runner + loop factory FIRST (and fall back + # to a hand-installed Windows selector policy only when uvicorn predates the + # loop-factory API, < 0.36). The actual serve call is then OUTSIDE the + # try/except so genuine serve-time errors (port in use, KeyboardInterrupt) + # propagate normally instead of being swallowed and double-run. + try: + from uvicorn._compat import asyncio_run as _runner + + _loop_factory = config.get_loop_factory() + except Exception: + _runner = None + _loop_factory = None + try: + asyncio.set_event_loop_policy( + asyncio.WindowsSelectorEventLoopPolicy() # type: ignore[attr-defined] + ) + except Exception: + pass + + if _runner is not None: + _runner(_serve(), loop_factory=_loop_factory) + else: + asyncio.run(_serve()) diff --git a/tests/test_web_server.py b/tests/test_web_server.py index 983ee510ea2..b06677f382f 100644 --- a/tests/test_web_server.py +++ b/tests/test_web_server.py @@ -21,6 +21,7 @@ def _stub_uvicorn(monkeypatch): loaded = True host = "127.0.0.1" port = 8000 + _loop_factory = None def __init__(self, *args, **kwargs): captured.update(kwargs) @@ -28,6 +29,9 @@ def _stub_uvicorn(monkeypatch): def load(self): pass + def get_loop_factory(self): + return self._loop_factory + class lifespan_class: should_exit = False state: dict = {} @@ -75,3 +79,99 @@ def test_start_server_enables_ws_ping_for_half_open_detection(monkeypatch): assert captured["ws_ping_interval"] == 20.0 assert captured["ws_ping_timeout"] == 20.0 + + +def test_start_server_runs_on_uvicorns_loop_factory(monkeypatch): + """The dashboard/desktop backend must serve uvicorn on the loop *uvicorn* + selects, not the interpreter default. + + On Windows ``asyncio.run`` defaults to a ProactorEventLoop, but uvicorn's + socket-serving stack forces a SelectorEventLoop on win32 + (``uvicorn/loops/asyncio.py``). Serving on the proactor loop binds a socket + that never accepts — the backend prints "Skipping web UI build" and hangs + forever with the port LISTENING but no TCP handshake (#50641). We fix that + by routing the serve call through ``uvicorn._compat.asyncio_run`` with + ``config.get_loop_factory()`` — exactly what ``uvicorn.Server.run`` does. + + This asserts the behavioral contract: on Windows the loop factory the runner + receives is the one uvicorn's own Config produced, and bare ``asyncio.run`` + is never the serve path when the loop-factory runner exists. + """ + _stub_uvicorn(monkeypatch) + + # The fix only changes behavior on win32; simulate it so the Windows branch + # is actually exercised on a POSIX CI host. + monkeypatch.setattr(web_server.sys, "platform", "win32") + + # The fake Config (installed by _stub_uvicorn) returns its ``_loop_factory`` + # from get_loop_factory(). Pin a sentinel so we can assert it is threaded + # through to the runner unchanged. + sentinel_factory = object() + monkeypatch.setattr(uvicorn.Config, "_loop_factory", sentinel_factory, raising=False) + + seen: dict = {} + + def _fake_runner(coro, *, loop_factory=None): + seen["loop_factory"] = loop_factory + coro.close() # drain without an event loop + + monkeypatch.setattr("uvicorn._compat.asyncio_run", _fake_runner, raising=False) + + # Bare asyncio.run must NOT be the serve path on Windows when the + # loop-factory runner is importable. + called_bare = {"hit": False} + + def _guard_asyncio_run(coro): + called_bare["hit"] = True + coro.close() + return None + + monkeypatch.setattr(asyncio, "run", _guard_asyncio_run) + + web_server.start_server(host="127.0.0.1", port=0, open_browser=False) + + assert seen.get("loop_factory") is sentinel_factory, ( + "start_server must pass uvicorn's get_loop_factory() result to the " + "runner so Windows serves on a SelectorEventLoop" + ) + assert called_bare["hit"] is False, ( + "start_server must not fall back to bare asyncio.run when uvicorn's " + "loop-factory runner is available" + ) + + +def test_start_server_keeps_bare_asyncio_run_on_posix(monkeypatch): + """POSIX behavior must be byte-for-byte unchanged: serve via the plain + ``asyncio.run(_serve())`` path, never the Windows loop-factory branch. + + The #50641 fix is intentionally win32-scoped to keep the blast radius + minimal — Python's default loop on POSIX is already a SelectorEventLoop + (or uvloop), which is what uvicorn serves on, so there is nothing to fix. + """ + _stub_uvicorn(monkeypatch) + monkeypatch.setattr(web_server.sys, "platform", "linux") + + # If the Windows branch were taken, the loop-factory runner would fire. + runner_called = {"hit": False} + + def _fake_runner(coro, *, loop_factory=None): + runner_called["hit"] = True + coro.close() + + monkeypatch.setattr("uvicorn._compat.asyncio_run", _fake_runner, raising=False) + + bare_called = {"hit": False} + + def _fake_asyncio_run(coro): + bare_called["hit"] = True + coro.close() + return None + + monkeypatch.setattr(asyncio, "run", _fake_asyncio_run) + + web_server.start_server(host="127.0.0.1", port=0, open_browser=False) + + assert bare_called["hit"] is True, "POSIX must serve via bare asyncio.run" + assert runner_called["hit"] is False, ( + "POSIX must not take the Windows loop-factory branch" + )