fix(dashboard): serve uvicorn on SelectorEventLoop on Windows (#50641) (#51717)

On Windows, start_server() served uvicorn via a bare asyncio.run(_serve()),
which uses the default ProactorEventLoop. uvicorn's socket-serving stack
assumes a SelectorEventLoop on win32 (uvicorn/loops/asyncio.py forces it, and
uvicorn.Server.run threads config.get_loop_factory() into its runner for
exactly this reason). Driving uvicorn on the proactor loop makes
server.startup() bind a socket that never accepts: the dashboard and desktop
backend print "Skipping web UI build" then hang forever with the port
LISTENING but no TCP handshake completing.

Fix is win32-scoped to keep the blast radius minimal: POSIX keeps the exact
asyncio.run(_serve()) it had (its default loop is already a SelectorEventLoop /
uvloop, which is what uvicorn serves on). Only on Windows do we mirror
uvicorn.Server.run and run on the loop factory uvicorn picks, with a fallback
to WindowsSelectorEventLoopPolicy for uvicorn < 0.36.

Fixes hermes dashboard and hermes desktop (the Electron app spawns a
hermes dashboard backend). The gateway symptom in the report has a separate
root cause (no uvicorn) and is not addressed here.
This commit is contained in:
Teknium 2026-06-23 23:43:24 -07:00 committed by GitHub
parent 901165b5a4
commit 050bd01b7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 142 additions and 1 deletions

View file

@ -13074,4 +13074,45 @@ def start_server(
if server.started:
await server.shutdown()
asyncio.run(_serve())
# On POSIX, keep the long-standing ``asyncio.run(_serve())`` behavior
# unchanged — Python's default loop there is already a SelectorEventLoop
# (or uvloop when uvicorn[standard] installs it), which is exactly what
# uvicorn serves on. Touching that path would only widen the blast radius
# for no benefit.
#
# On Windows it is broken: ``asyncio.run`` defaults to a ProactorEventLoop,
# but uvicorn's socket-serving stack assumes a SelectorEventLoop on win32
# (``uvicorn/loops/asyncio.py`` forces it, and ``uvicorn.Server.run`` threads
# ``config.get_loop_factory()`` into its runner for exactly this reason).
# Driving uvicorn on the proactor loop makes ``server.startup()`` bind a
# socket that never accepts — the dashboard / desktop backend prints
# "Skipping web UI build" and then hangs forever with the port LISTENING but
# no TCP handshake completing (#50641). So *only on Windows* we mirror
# uvicorn's own machinery and run on the loop factory it picks.
if sys.platform != "win32":
asyncio.run(_serve())
return
# Windows-only path. Resolve the runner + loop factory FIRST (and fall back
# to a hand-installed Windows selector policy only when uvicorn predates the
# loop-factory API, < 0.36). The actual serve call is then OUTSIDE the
# try/except so genuine serve-time errors (port in use, KeyboardInterrupt)
# propagate normally instead of being swallowed and double-run.
try:
from uvicorn._compat import asyncio_run as _runner
_loop_factory = config.get_loop_factory()
except Exception:
_runner = None
_loop_factory = None
try:
asyncio.set_event_loop_policy(
asyncio.WindowsSelectorEventLoopPolicy() # type: ignore[attr-defined]
)
except Exception:
pass
if _runner is not None:
_runner(_serve(), loop_factory=_loop_factory)
else:
asyncio.run(_serve())

View file

@ -21,6 +21,7 @@ def _stub_uvicorn(monkeypatch):
loaded = True
host = "127.0.0.1"
port = 8000
_loop_factory = None
def __init__(self, *args, **kwargs):
captured.update(kwargs)
@ -28,6 +29,9 @@ def _stub_uvicorn(monkeypatch):
def load(self):
pass
def get_loop_factory(self):
return self._loop_factory
class lifespan_class:
should_exit = False
state: dict = {}
@ -75,3 +79,99 @@ def test_start_server_enables_ws_ping_for_half_open_detection(monkeypatch):
assert captured["ws_ping_interval"] == 20.0
assert captured["ws_ping_timeout"] == 20.0
def test_start_server_runs_on_uvicorns_loop_factory(monkeypatch):
"""The dashboard/desktop backend must serve uvicorn on the loop *uvicorn*
selects, not the interpreter default.
On Windows ``asyncio.run`` defaults to a ProactorEventLoop, but uvicorn's
socket-serving stack forces a SelectorEventLoop on win32
(``uvicorn/loops/asyncio.py``). Serving on the proactor loop binds a socket
that never accepts the backend prints "Skipping web UI build" and hangs
forever with the port LISTENING but no TCP handshake (#50641). We fix that
by routing the serve call through ``uvicorn._compat.asyncio_run`` with
``config.get_loop_factory()`` exactly what ``uvicorn.Server.run`` does.
This asserts the behavioral contract: on Windows the loop factory the runner
receives is the one uvicorn's own Config produced, and bare ``asyncio.run``
is never the serve path when the loop-factory runner exists.
"""
_stub_uvicorn(monkeypatch)
# The fix only changes behavior on win32; simulate it so the Windows branch
# is actually exercised on a POSIX CI host.
monkeypatch.setattr(web_server.sys, "platform", "win32")
# The fake Config (installed by _stub_uvicorn) returns its ``_loop_factory``
# from get_loop_factory(). Pin a sentinel so we can assert it is threaded
# through to the runner unchanged.
sentinel_factory = object()
monkeypatch.setattr(uvicorn.Config, "_loop_factory", sentinel_factory, raising=False)
seen: dict = {}
def _fake_runner(coro, *, loop_factory=None):
seen["loop_factory"] = loop_factory
coro.close() # drain without an event loop
monkeypatch.setattr("uvicorn._compat.asyncio_run", _fake_runner, raising=False)
# Bare asyncio.run must NOT be the serve path on Windows when the
# loop-factory runner is importable.
called_bare = {"hit": False}
def _guard_asyncio_run(coro):
called_bare["hit"] = True
coro.close()
return None
monkeypatch.setattr(asyncio, "run", _guard_asyncio_run)
web_server.start_server(host="127.0.0.1", port=0, open_browser=False)
assert seen.get("loop_factory") is sentinel_factory, (
"start_server must pass uvicorn's get_loop_factory() result to the "
"runner so Windows serves on a SelectorEventLoop"
)
assert called_bare["hit"] is False, (
"start_server must not fall back to bare asyncio.run when uvicorn's "
"loop-factory runner is available"
)
def test_start_server_keeps_bare_asyncio_run_on_posix(monkeypatch):
"""POSIX behavior must be byte-for-byte unchanged: serve via the plain
``asyncio.run(_serve())`` path, never the Windows loop-factory branch.
The #50641 fix is intentionally win32-scoped to keep the blast radius
minimal Python's default loop on POSIX is already a SelectorEventLoop
(or uvloop), which is what uvicorn serves on, so there is nothing to fix.
"""
_stub_uvicorn(monkeypatch)
monkeypatch.setattr(web_server.sys, "platform", "linux")
# If the Windows branch were taken, the loop-factory runner would fire.
runner_called = {"hit": False}
def _fake_runner(coro, *, loop_factory=None):
runner_called["hit"] = True
coro.close()
monkeypatch.setattr("uvicorn._compat.asyncio_run", _fake_runner, raising=False)
bare_called = {"hit": False}
def _fake_asyncio_run(coro):
bare_called["hit"] = True
coro.close()
return None
monkeypatch.setattr(asyncio, "run", _fake_asyncio_run)
web_server.start_server(host="127.0.0.1", port=0, open_browser=False)
assert bare_called["hit"] is True, "POSIX must serve via bare asyncio.run"
assert runner_called["hit"] is False, (
"POSIX must not take the Windows loop-factory branch"
)