From 1e25358a8f222ffd1711ba8901ebaa32ac263bc7 Mon Sep 17 00:00:00 2001 From: ethernet Date: Thu, 11 Jun 2026 20:55:57 -0400 Subject: [PATCH] refactor(desktop): use port 0 for ephemeral port discovery instead of PortPool reservation Replace the PortPool-based port reservation system (9120-9199 range) with OS-assigned ephemeral ports via --port 0. Before: Desktop probed a hardcoded port range, reserved ports in-process to close TOCTOU races, and passed the chosen port to the dashboard via CLI arg. After: Desktop spawns dashboard with --port 0, parses the actual port from a stdout announcement line (HERMES_DASHBOARD_READY port=), and uses that for WebSocket connections. Changes: - web_server.py: add --port 0 support with SO_REUSEADDR pre-bind + announcement; add EADDRINUSE preflight for explicit ports - main.cjs: remove PortPool, PORT_FLOOR/CEILING, pickPort(), isPortAvailable(); add waitForDashboardPort() stdout parser - Delete port-pool.cjs and port-pool.test.cjs (106 lines removed) Net effect: eliminates the entire TOCTOU-mitigation reservation infrastructure and arbitrary port range constraints. OS handles port allocation natively. --- apps/desktop/electron/backend-ready.cjs | 66 ++++++++ apps/desktop/electron/main.cjs | 73 ++------- apps/desktop/electron/port-pool.cjs | 73 --------- apps/desktop/electron/port-pool.test.cjs | 77 --------- apps/desktop/package.json | 2 +- hermes_cli/subcommands/dashboard.py | 2 +- hermes_cli/web_server.py | 156 +++++++++++++------ tests/hermes_cli/test_dashboard_auth_gate.py | 59 ++++++- tests/test_web_server.py | 67 +++++++- 9 files changed, 305 insertions(+), 270 deletions(-) create mode 100644 apps/desktop/electron/backend-ready.cjs delete mode 100644 apps/desktop/electron/port-pool.cjs delete mode 100644 apps/desktop/electron/port-pool.test.cjs diff --git a/apps/desktop/electron/backend-ready.cjs b/apps/desktop/electron/backend-ready.cjs new file mode 100644 index 00000000000..9af41e549c4 --- /dev/null +++ b/apps/desktop/electron/backend-ready.cjs @@ -0,0 +1,66 @@ +const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m + +/** + * Watch a child process's stdout for the `HERMES_DASHBOARD_READY port=` + * line that web_server.py prints after uvicorn binds its socket. + * + * Returns the parsed port. Rejects if: + * - the child exits before emitting the line + * - the child emits an `error` event + * - no line arrives within the timeout + * + * A single `cleanup()` tears down every listener (data/exit/error/timeout) + * on every terminal path — resolve, reject, or timeout — so repeated + * backend spawns don't leak listener slots on the child. + */ +function waitForDashboardPort(child, timeoutMs = 45_000) { + return new Promise((resolve, reject) => { + let buf = '' + let done = false + + function cleanup() { + if (done) return + done = true + clearTimeout(timer) + child.stdout.off('data', onData) + child.off('exit', onExit) + child.off('error', onError) + } + + function onData(chunk) { + buf += chunk.toString() + let nl + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl) + buf = buf.slice(nl + 1) + const m = line.match(_READY_RE) + if (m) { + cleanup() + resolve(parseInt(m[1], 10)) + return + } + } + } + + function onExit(code, signal) { + cleanup() + reject(new Error(`Hermes backend: exited before port announcement (${signal || code})`)) + } + + function onError(err) { + cleanup() + reject(err) + } + + const timer = setTimeout(() => { + cleanup() + reject(new Error(`Timed out waiting for Hermes backend port announcement (${timeoutMs}ms)`)) + }, timeoutMs) + + child.stdout.on('data', onData) + child.on('exit', onExit) + child.on('error', onError) + }) +} + +module.exports = { waitForDashboardPort } diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index 923d4127075..85cf762e85f 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -35,7 +35,7 @@ const { const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs') const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs') const { adoptServedDashboardToken } = require('./dashboard-token.cjs') -const { PortPool } = require('./port-pool.cjs') +const { waitForDashboardPort } = require('./backend-ready.cjs') const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs') const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs') const { buildDesktopBackendEnv } = require('./backend-env.cjs') @@ -111,12 +111,6 @@ if (USER_DATA_OVERRIDE) { app.setPath('userData', resolvedUserData) } -const PORT_FLOOR = 9120 -const PORT_CEILING = 9199 -// In-process port reservations that close the pickPort() TOCTOU window where -// two concurrent backend spawns could be handed the same port. See -// port-pool.cjs for the full rationale. -const portPool = new PortPool(PORT_FLOOR, PORT_CEILING) const DEV_SERVER = process.env.HERMES_DESKTOP_DEV_SERVER const IS_PACKAGED = app.isPackaged const IS_MAC = process.platform === 'darwin' @@ -2566,24 +2560,6 @@ async function ensureRuntime(backend) { return backend } -function isPortAvailable(port) { - return new Promise(resolve => { - const server = net.createServer() - server.once('error', () => resolve(false)) - server.once('listening', () => { - server.close(() => resolve(true)) - }) - server.listen(port, '127.0.0.1') - }) -} - -async function pickPort() { - const port = await portPool.reserve(isPortAvailable) - if (port === null) { - throw new Error(`No free localhost port in ${PORT_FLOOR}-${PORT_CEILING}`) - } - return port -} function fetchJson(url, token, options = {}) { return new Promise((resolve, reject) => { @@ -4661,25 +4637,14 @@ async function spawnPoolBackend(profile, entry) { } } - const port = await pickPort() const token = crypto.randomBytes(32).toString('base64url') // --profile wins over the inherited HERMES_HOME env (see _apply_profile_override // step 3 in hermes_cli/main.py), so the child re-homes to this profile. - const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)] - let backend - let hermesCwd - let webDist - try { - backend = await ensureRuntime(resolveHermesBackend(dashboardArgs)) - hermesCwd = resolveHermesCwd() - webDist = resolveWebDist() - } catch (error) { - // These run before the child exists / its exit handler is attached, so a - // throw here would otherwise leak the reservation and slowly exhaust the - // 9120-9199 range across switch cycles in one app session. - portPool.release(port) - throw error - } + // --port 0: the OS assigns an ephemeral port; the child announces it on stdout. + const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', '0'] + const backend = await ensureRuntime(resolveHermesBackend(dashboardArgs)) + const hermesCwd = resolveHermesCwd() + const webDist = resolveWebDist() rememberLog(`Starting Hermes backend for profile "${profile}" via ${backend.label}`) @@ -4707,7 +4672,6 @@ async function spawnPoolBackend(profile, entry) { }) ) entry.process = child - entry.port = port entry.token = token child.stdout.on('data', rememberLog) @@ -4721,13 +4685,11 @@ async function spawnPoolBackend(profile, entry) { child.once('error', error => { rememberLog(`Hermes backend for profile "${profile}" failed to start: ${error.message}`) backendPool.delete(profile) - portPool.release(port) rejectStart?.(error) }) child.once('exit', (code, signal) => { rememberLog(`Hermes backend for profile "${profile}" exited (${signal || code})`) backendPool.delete(profile) - portPool.release(port) if (!ready) { rejectStart?.( new Error(`Hermes backend for profile "${profile}" exited before it became ready (${signal || code}).`) @@ -4735,6 +4697,10 @@ async function spawnPoolBackend(profile, entry) { } }) + // Discover the ephemeral port the child bound to + const port = await Promise.race([waitForDashboardPort(child), startFailed]) + entry.port = port + const baseUrl = `http://127.0.0.1:${port}` await Promise.race([waitForHermes(baseUrl, token), startFailed]) ready = true @@ -4762,7 +4728,6 @@ function stopPoolBackend(profile) { const entry = backendPool.get(profile) if (!entry) return backendPool.delete(profile) - if (entry.port) portPool.release(entry.port) if (entry.process && !entry.process.killed) { try { entry.process.kill('SIGTERM') @@ -4848,11 +4813,6 @@ async function startHermes() { } if (connectionPromise) return connectionPromise - // Hoisted so the outer .catch can release a port reserved by pickPort() when - // a throw (e.g. ensureRuntime failing) happens before the child's exit - // handler is attached. Stays null on the remote path (no port picked). - let reservedPort = null - connectionPromise = (async () => { await advanceBootProgress('backend.resolve', 'Resolving Hermes backend', 8) // Resolve for the desktop's primary profile so a per-profile remote @@ -4880,11 +4840,9 @@ async function startHermes() { } } - await advanceBootProgress('backend.port', 'Finding an open local port', 16) - const port = await pickPort() - reservedPort = port const token = crypto.randomBytes(32).toString('base64url') - const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)] + // --port 0: the OS assigns an ephemeral port; the child announces it on stdout. + const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', '0'] // Pin the desktop's chosen profile via the global --profile flag. This is // deterministic (it wins over the sticky ~/.hermes/active_profile file) and // resolves HERMES_HOME the same way `hermes -p ` does on the CLI. An @@ -4951,7 +4909,6 @@ async function startHermes() { ) hermesProcess = null connectionPromise = null - portPool.release(port) sendBackendExit({ code: null, signal: null, error: error.message }) rejectBackendStart?.(error) }) @@ -4959,7 +4916,6 @@ async function startHermes() { rememberLog(`Hermes backend exited (${signal || code})`) hermesProcess = null connectionPromise = null - portPool.release(port) sendBackendExit({ code, signal }) if (!backendReady) { const message = `Hermes backend exited before it became ready (${signal || code}).` @@ -4980,6 +4936,10 @@ async function startHermes() { } }) + await advanceBootProgress('backend.port', 'Waiting for Hermes backend to launch', 86) + // Discover the ephemeral port the child bound to + const port = await Promise.race([waitForDashboardPort(hermesProcess), backendStartFailed]) + const baseUrl = `http://127.0.0.1:${port}` await advanceBootProgress('backend.wait', 'Waiting for Hermes backend to become ready', 90) await Promise.race([waitForHermes(baseUrl, token), backendStartFailed]) @@ -5019,7 +4979,6 @@ async function startHermes() { { allowDecrease: true } ) connectionPromise = null - portPool.release(reservedPort) throw error }) diff --git a/apps/desktop/electron/port-pool.cjs b/apps/desktop/electron/port-pool.cjs deleted file mode 100644 index 35131090814..00000000000 --- a/apps/desktop/electron/port-pool.cjs +++ /dev/null @@ -1,73 +0,0 @@ -'use strict' - -/** - * In-process port reservation pool for the desktop backend launcher. - * - * pickPort() probes a localhost port with a throwaway server and closes it - * before the real bind happens in a separate Python child. Between that probe - * and the child's bind there is a TOCTOU window: a second concurrent spawn - * (the primary backend racing a pool backend) can be handed the SAME port, and - * one then dies with EADDRINUSE ("address already in use" -> "Object has been - * destroyed" boot loop). Reserving the chosen port in THIS process until the - * child exits closes that window. - * - * The OS bind remains the source of truth; this only deconflicts racers inside - * this process — it can't stop a foreign squatter, which the probe + the - * EADDRINUSE self-heal still cover. - * - * The pool is dependency-injected (the availability probe is passed in) and - * free of Electron/Node socket I/O, so it is unit-tested without real sockets - * (see port-pool.test.cjs). - */ -class PortPool { - /** - * @param {number} floor inclusive lowest port to hand out - * @param {number} ceiling inclusive highest port to hand out - */ - constructor(floor, ceiling) { - this.floor = floor - this.ceiling = ceiling - this._reserved = new Set() - } - - /** @returns {boolean} whether `port` is currently reserved in-process. */ - has(port) { - return this._reserved.has(port) - } - - /** Release a previously reserved port. No-op if it was not reserved. */ - release(port) { - this._reserved.delete(port) - } - - /** Drop all reservations. */ - clear() { - this._reserved.clear() - } - - /** @returns {number} count of currently reserved ports. */ - get size() { - return this._reserved.size - } - - /** - * Reserve and return the lowest port in [floor, ceiling] that is neither - * already reserved in-process nor rejected by `isAvailable(port)`, or null - * if every port is taken. `isAvailable` may be sync (boolean) or async - * (Promise); it is awaited either way. - * - * @param {(port: number) => boolean | Promise} isAvailable - * @returns {Promise} - */ - async reserve(isAvailable) { - for (let port = this.floor; port <= this.ceiling; port += 1) { - if (this._reserved.has(port)) continue - if (!(await isAvailable(port))) continue - this._reserved.add(port) - return port - } - return null - } -} - -module.exports = { PortPool } diff --git a/apps/desktop/electron/port-pool.test.cjs b/apps/desktop/electron/port-pool.test.cjs deleted file mode 100644 index f2600ce7d5f..00000000000 --- a/apps/desktop/electron/port-pool.test.cjs +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Tests for electron/port-pool.cjs. - * - * Run with: node --test electron/port-pool.test.cjs - * - * PortPool is the in-process reservation that closes the pickPort() TOCTOU - * window. These cover selection order, skipping reserved/unavailable ports, - * release/reuse, exhaustion, and async probes — without real sockets. - */ - -const test = require('node:test') -const assert = require('node:assert/strict') - -const { PortPool } = require('./port-pool.cjs') - -const allFree = () => true - -test('reserve returns the lowest free port and reserves it', async () => { - const pool = new PortPool(9120, 9199) - const port = await pool.reserve(allFree) - assert.equal(port, 9120) - assert.ok(pool.has(9120)) - assert.equal(pool.size, 1) -}) - -test('reserve skips ports already reserved in-process', async () => { - const pool = new PortPool(9120, 9199) - const first = await pool.reserve(allFree) - const second = await pool.reserve(allFree) - assert.equal(first, 9120) - assert.equal(second, 9121) -}) - -test('reserve skips ports the probe rejects', async () => { - const pool = new PortPool(9120, 9199) - const busy = new Set([9120, 9121]) - const port = await pool.reserve(p => !busy.has(p)) - assert.equal(port, 9122) -}) - -test('reserve returns null when every port is taken', async () => { - const pool = new PortPool(9120, 9121) - await pool.reserve(allFree) - await pool.reserve(allFree) - assert.equal(await pool.reserve(allFree), null) -}) - -test('release frees a reserved port for reuse', async () => { - const pool = new PortPool(9120, 9120) - assert.equal(await pool.reserve(allFree), 9120) - assert.equal(await pool.reserve(allFree), null) // exhausted - pool.release(9120) - assert.ok(!pool.has(9120)) - assert.equal(await pool.reserve(allFree), 9120) // reusable -}) - -test('release is a no-op for an unreserved port', () => { - const pool = new PortPool(9120, 9199) - pool.release(9120) - assert.equal(pool.size, 0) -}) - -test('reserve awaits an async probe', async () => { - const pool = new PortPool(9120, 9199) - const busy = new Set([9120]) - const port = await pool.reserve(p => Promise.resolve(!busy.has(p))) - assert.equal(port, 9121) -}) - -test('clear drops all reservations', async () => { - const pool = new PortPool(9120, 9199) - await pool.reserve(allFree) - await pool.reserve(allFree) - assert.equal(pool.size, 2) - pool.clear() - assert.equal(pool.size, 0) -}) diff --git a/apps/desktop/package.json b/apps/desktop/package.json index d78416589f6..08f1cc1aa0f 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -36,7 +36,7 @@ "test:desktop:nsis": "node scripts/test-desktop.mjs nsis", "test:desktop:existing": "node scripts/test-desktop.mjs existing", "test:desktop:fresh": "node scripts/test-desktop.mjs fresh", - "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/port-pool.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs", + "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs", "typecheck": "tsc -p . --noEmit", "lint": "eslint src/ electron/", "lint:fix": "eslint src/ electron/ --fix", diff --git a/hermes_cli/subcommands/dashboard.py b/hermes_cli/subcommands/dashboard.py index 01ee57e2624..380a81c3e3a 100644 --- a/hermes_cli/subcommands/dashboard.py +++ b/hermes_cli/subcommands/dashboard.py @@ -23,7 +23,7 @@ def build_dashboard_parser( description="Launch the Hermes Agent web dashboard for managing config, API keys, and sessions", ) dashboard_parser.add_argument( - "--port", type=int, default=9119, help="Port (default 9119)" + "--port", type=int, default=9119, help="Port (default 9119, 0 for auto-assign by OS)" ) dashboard_parser.add_argument( "--host", default="127.0.0.1", help="Host (default 127.0.0.1)" diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 9e295d5ccea..31e28cc2996 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -11568,6 +11568,62 @@ app.include_router(_dashboard_auth_router) mount_spa(app) +def _read_bound_port(server: "uvicorn.Server", fallback: int) -> int: + """Read the OS-assigned port from a live uvicorn server socket. + + After ``server.startup()`` the socket is bound. Returns the actual + port so ephemeral (port-0) discovery works without a pre-bind TOCTOU. + Falls back to *fallback* if the socket list is empty (shouldn't happen + but guards against uvicorn internals changing). + """ + if server.servers and server.servers[0].sockets: + return server.servers[0].sockets[0].getsockname()[1] + return fallback + + +def _maybe_open_browser( + host: str, actual_port: int, open_browser: bool, initial_profile: str +) -> None: + """Open the dashboard URL in the user's browser if appropriate. + + Skips on headless Linux (no ``DISPLAY`` / ``WAYLAND_DISPLAY``) to avoid + TUI browsers (links, lynx) that would SIGHUP the server process. + Maps ``0.0.0.0`` / ``::`` binds to ``127.0.0.1`` so the browser opens + a reachable URL. + """ + if not open_browser: + return + + import webbrowser + + _has_display = ( + sys.platform != "linux" + or bool(os.environ.get("DISPLAY")) + or bool(os.environ.get("WAYLAND_DISPLAY")) + ) + if not _has_display: + _log.debug( + "Skipping browser-open: no DISPLAY or WAYLAND_DISPLAY detected " + "(headless Linux). Pass --no-open to suppress this detection." + ) + return + + _display_host = host if host not in ("0.0.0.0", "::") else "127.0.0.1" + _open_url = f"http://{_display_host}:{actual_port}" + if initial_profile: + from urllib.parse import quote + _open_url += f"/?profile={quote(initial_profile)}" + + def _open(): + try: + time.sleep(1.0) + webbrowser.open(_open_url) + except Exception: + pass + + threading.Thread(target=_open, daemon=True).start() + + def start_server( host: str = "127.0.0.1", port: int = 9119, @@ -11650,60 +11706,60 @@ def start_server( # Record the bound host so host_header_middleware can validate incoming # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7). - # bound_port is also stashed so /api/pty can build the back-WS URL the - # PTY child uses to publish events to the dashboard sidebar. app.state.bound_host = host - app.state.bound_port = port - if open_browser: - import webbrowser - - # On headless Linux (no DISPLAY or WAYLAND_DISPLAY) some registered - # browsers are TUI programs (links, lynx, www-browser) that try to - # take over the terminal. That can send SIGHUP to the server process - # and cause an immediate exit even though uvicorn bound successfully. - # Skip the auto-open attempt on headless systems and let the user - # open the URL manually. macOS and Windows are always considered - # display-capable. - _has_display = ( - sys.platform != "linux" - or bool(os.environ.get("DISPLAY")) - or bool(os.environ.get("WAYLAND_DISPLAY")) - ) - - if _has_display: - _open_url = f"http://{host}:{port}" - if initial_profile: - from urllib.parse import quote - _open_url += f"/?profile={quote(initial_profile)}" - - def _open(): - try: - time.sleep(1.0) - webbrowser.open(_open_url) - except Exception: - pass - - threading.Thread(target=_open, daemon=True).start() - else: - _log.debug( - "Skipping browser-open: no DISPLAY or WAYLAND_DISPLAY detected " - "(headless Linux). Pass --no-open to suppress this detection." - ) - - print(f" Hermes Web UI → http://{host}:{port}") - # proxy_headers defaults to False so _ws_client_is_allowed sees the real - # connection peer rather than X-Forwarded-For's rewritten value (which - # would defeat the loopback gate when behind a reverse proxy). When the - # OAuth gate is active we are explicitly running behind a TLS terminator - # (Fly.io) and need X-Forwarded-Proto to decide cookie Secure flags, so - # we flip proxy_headers on for that mode. - uvicorn.run( + # ── Start uvicorn with direct Server API ───────────────────────── + # We use uvicorn.Server directly (not uvicorn.run) so we can split + # startup from the main loop. After startup() the socket is actually + # bound — we read the OS-assigned port from the live socket, print + # HERMES_DASHBOARD_READY, open the browser, *then* serve. + # + # This eliminates the TOCTOU of the old pre-bind-then-close approach + # (bind port 0 → close → uvicorn rebind): the socket is held by + # uvicorn the entire time, so no other process can steal the port. + # + # For explicit non-zero ports, if the port is taken uvicorn catches + # OSError inside create_server() and exits with a clear error — no + # separate preflight probe needed. + config = uvicorn.Config( app, host=host, port=port, log_level="warning", + # proxy_headers defaults to False so _ws_client_is_allowed sees + # the real connection peer rather than X-Forwarded-For's rewritten + # value (which would defeat the loopback gate when behind a reverse + # proxy). When the OAuth gate is active we are explicitly running + # behind a TLS terminator (Fly.io) and need X-Forwarded-Proto to + # decide cookie Secure flags, so we flip proxy_headers on for that + # mode. proxy_headers=bool(app.state.auth_required), - # Detect half-open WS connections (reverse-proxy 524, dropped tunnels) - # within ~20-40s so WebSocketDisconnect fires the disconnect→reap path. - # 20s stays under Cloudflare Tunnel's idle timeout, keeping it warm. + # Detect half-open WS connections (reverse-proxy 524, dropped + # tunnels) within ~20-40s so WebSocketDisconnect fires the + # disconnect→reap path. 20s stays under Cloudflare Tunnel's idle + # timeout, keeping it warm. ws_ping_interval=20.0, ws_ping_timeout=20.0, ) + server = uvicorn.Server(config) + + async def _serve(): + # Split startup from main_loop so we can read the bound port + # after the socket is live (ephemeral port discovery). + if not config.loaded: + config.load() + server.lifespan = config.lifespan_class(config) + with server.capture_signals(): + await server.startup() + if server.should_exit: + return + + actual_port = _read_bound_port(server, fallback=port) + app.state.bound_port = actual_port + + print(f"HERMES_DASHBOARD_READY port={actual_port}", flush=True) + print(f" Hermes Web UI → http://{host}:{actual_port}") + _maybe_open_browser(host, actual_port, open_browser, initial_profile) + + await server.main_loop() + if server.started: + await server.shutdown() + + asyncio.run(_serve()) diff --git a/tests/hermes_cli/test_dashboard_auth_gate.py b/tests/hermes_cli/test_dashboard_auth_gate.py index b7e01aa3992..c39356bbb43 100644 --- a/tests/hermes_cli/test_dashboard_auth_gate.py +++ b/tests/hermes_cli/test_dashboard_auth_gate.py @@ -106,17 +106,60 @@ def test_should_require_auth_truth_table(host, allow_public, expected): def _stub_uvicorn_run(monkeypatch): - """Replace uvicorn.run with a no-op recorder so start_server returns - immediately (rather than blocking on the event loop). Returns the dict - that will capture the keyword args.""" + """Replace uvicorn.Config/Server with no-op fakes so start_server + returns immediately (rather than blocking on the event loop). Returns the dict + that will capture the keyword args. + """ + import asyncio + import contextlib import uvicorn - captured: dict = {} + captured: dict = {"kwargs": {}} - def _fake_run(*args, **kwargs): - captured["args"] = args - captured["kwargs"] = kwargs + class _FakeConfig: + loaded = True + host = "127.0.0.1" + port = 8000 - monkeypatch.setattr(uvicorn, "run", _fake_run) + def __init__(self, *args, **kwargs): + captured["kwargs"] = kwargs + + def load(self): + pass + + class lifespan_class: + should_exit = False + state: dict = {} + + def __init__(self, *a, **kw): + pass + + async def startup(self): + pass + + async def shutdown(self): + pass + + class _FakeServer: + should_exit = False + started = True + servers: list = [] + lifespan = None + + @staticmethod + def capture_signals(): + return contextlib.nullcontext() + + async def startup(self, sockets=None): + pass + + async def main_loop(self): + pass + + async def shutdown(self, sockets=None): + pass + + monkeypatch.setattr(uvicorn, "Config", _FakeConfig) + monkeypatch.setattr(uvicorn, "Server", lambda config: _FakeServer()) return captured diff --git a/tests/test_web_server.py b/tests/test_web_server.py index 2f32925963f..983ee510ea2 100644 --- a/tests/test_web_server.py +++ b/tests/test_web_server.py @@ -1,15 +1,76 @@ +"""Test that start_server configures ws-ping keepalive. + +The server now uses uvicorn.Server directly (not uvicorn.run) so we stub +Config + Server + asyncio.run to capture kwargs without starting an event loop. +""" + +import asyncio +import contextlib + import uvicorn from hermes_cli import web_server +def _stub_uvicorn(monkeypatch): + """Replace uvicorn.Config/Server with fakes so start_server returns + immediately. Returns a dict with captured Config kwargs.""" + captured: dict = {} + + class _FakeConfig: + loaded = True + host = "127.0.0.1" + port = 8000 + + def __init__(self, *args, **kwargs): + captured.update(kwargs) + + def load(self): + pass + + class lifespan_class: + should_exit = False + state: dict = {} + + def __init__(self, *a, **kw): + pass + + async def startup(self): + pass + + async def shutdown(self): + pass + + class _FakeServer: + should_exit = False + started = True + servers: list = [] + lifespan = None + + @staticmethod + def capture_signals(): + return contextlib.nullcontext() + + async def startup(self, sockets=None): + pass + + async def main_loop(self): + pass + + async def shutdown(self, sockets=None): + pass + + monkeypatch.setattr(uvicorn, "Config", _FakeConfig) + monkeypatch.setattr(uvicorn, "Server", lambda config: _FakeServer()) + return captured + + def test_start_server_enables_ws_ping_for_half_open_detection(monkeypatch): """WS ping must be configured so half-open connections (reverse-proxy 524, dropped tunnels) raise WebSocketDisconnect into the reaping path (#32377).""" - captured = {} - monkeypatch.setattr(uvicorn, "run", lambda *args, **kwargs: captured.update(kwargs)) + captured = _stub_uvicorn(monkeypatch) - # Loopback bind => no auth gate, so this reaches uvicorn.run without setup. + # Loopback bind => no auth gate, so this reaches the Config constructor. web_server.start_server(host="127.0.0.1", port=0, open_browser=False) assert captured["ws_ping_interval"] == 20.0