From 9ff0ba082739a74c54fc9bb26062be168a167391 Mon Sep 17 00:00:00 2001 From: "Mani Saint-Victor, MD" Date: Tue, 9 Jun 2026 17:03:55 -0400 Subject: [PATCH] fix(desktop): prevent backend port-squat boot loop and pickPort self-collision Two fixes to the Electron desktop launch path, with the port-reservation logic extracted into a unit-tested module: 1. hermes:bootstrap:reset ("Reload and retry") only cleared connectionPromise, leaving the live backend alive; the orphan kept binding PORT_FLOOR (9120) so the next startHermes() hit EADDRINUSE / "Object has been destroyed" and the window looped. Await teardownPrimaryBackendAndWait() so the reset stops the old backend before restarting. 2. pickPort() probes-then-closes a socket before the real bind happens in a separate Python child, so two concurrent spawns (primary + pool backend) could both be handed PORT_FLOOR and one died with EADDRINUSE. The reservation bookkeeping is extracted into electron/port-pool.cjs (PortPool): pickPort() reserves the chosen port until the child exits and releases it on every exit/error/throw-before-spawn path, closing the TOCTOU window. PortPool is dependency-injected (probe passed in) and socket-free, unit-tested in electron/port-pool.test.cjs (8 cases) and wired into the test:desktop:platforms script. (cherry picked from commit d4133945b91e1d25b2e3a506553a8f0e7a598a5a) --- apps/desktop/electron/main.cjs | 47 ++++++++++++--- apps/desktop/electron/port-pool.cjs | 73 ++++++++++++++++++++++ apps/desktop/electron/port-pool.test.cjs | 77 ++++++++++++++++++++++++ apps/desktop/package.json | 2 +- 4 files changed, 190 insertions(+), 9 deletions(-) create mode 100644 apps/desktop/electron/port-pool.cjs create mode 100644 apps/desktop/electron/port-pool.test.cjs diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index 4ab82a10e41..8975cec79a2 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -30,6 +30,7 @@ const { buildSessionWindowUrl, createSessionWindowRegistry } = require('./sessio const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs') const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs') const { isForeignBackendToken, resolveServedDashboardToken } = require('./dashboard-token.cjs') +const { PortPool } = require('./port-pool.cjs') const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs') const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs') const { readDirForIpc } = require('./fs-read-dir.cjs') @@ -108,6 +109,10 @@ if (USER_DATA_OVERRIDE) { const PORT_FLOOR = 9120 const PORT_CEILING = 9199 +// In-process port reservations that close the pickPort() TOCTOU window where +// two concurrent backend spawns could be handed the same port. See +// port-pool.cjs for the full rationale. +const portPool = new PortPool(PORT_FLOOR, PORT_CEILING) const DEV_SERVER = process.env.HERMES_DESKTOP_DEV_SERVER const IS_PACKAGED = app.isPackaged const IS_MAC = process.platform === 'darwin' @@ -2453,10 +2458,11 @@ function isPortAvailable(port) { } async function pickPort() { - for (let port = PORT_FLOOR; port <= PORT_CEILING; port += 1) { - if (await isPortAvailable(port)) return port + const port = await portPool.reserve(isPortAvailable) + if (port === null) { + throw new Error(`No free localhost port in ${PORT_FLOOR}-${PORT_CEILING}`) } - throw new Error(`No free localhost port in ${PORT_FLOOR}-${PORT_CEILING}`) + return port } function fetchJson(url, token, options = {}) { @@ -4540,9 +4546,20 @@ async function spawnPoolBackend(profile, entry) { // --profile wins over the inherited HERMES_HOME env (see _apply_profile_override // step 3 in hermes_cli/main.py), so the child re-homes to this profile. const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)] - const backend = await ensureRuntime(resolveHermesBackend(dashboardArgs)) - const hermesCwd = resolveHermesCwd() - const webDist = resolveWebDist() + let backend + let hermesCwd + let webDist + try { + backend = await ensureRuntime(resolveHermesBackend(dashboardArgs)) + hermesCwd = resolveHermesCwd() + webDist = resolveWebDist() + } catch (error) { + // These run before the child exists / its exit handler is attached, so a + // throw here would otherwise leak the reservation and slowly exhaust the + // 9120-9199 range across switch cycles in one app session. + portPool.release(port) + throw error + } rememberLog(`Starting Hermes backend for profile "${profile}" via ${backend.label}`) @@ -4580,11 +4597,13 @@ async function spawnPoolBackend(profile, entry) { child.once('error', error => { rememberLog(`Hermes backend for profile "${profile}" failed to start: ${error.message}`) backendPool.delete(profile) + portPool.release(port) rejectStart?.(error) }) child.once('exit', (code, signal) => { rememberLog(`Hermes backend for profile "${profile}" exited (${signal || code})`) backendPool.delete(profile) + portPool.release(port) if (!ready) { rejectStart?.( new Error(`Hermes backend for profile "${profile}" exited before it became ready (${signal || code}).`) @@ -4633,6 +4652,7 @@ function stopPoolBackend(profile) { const entry = backendPool.get(profile) if (!entry) return backendPool.delete(profile) + if (entry.port) portPool.release(entry.port) if (entry.process && !entry.process.killed) { try { entry.process.kill('SIGTERM') @@ -4718,6 +4738,11 @@ async function startHermes() { } if (connectionPromise) return connectionPromise + // Hoisted so the outer .catch can release a port reserved by pickPort() when + // a throw (e.g. ensureRuntime failing) happens before the child's exit + // handler is attached. Stays null on the remote path (no port picked). + let reservedPort = null + connectionPromise = (async () => { await advanceBootProgress('backend.resolve', 'Resolving Hermes backend', 8) // Resolve for the desktop's primary profile so a per-profile remote @@ -4747,6 +4772,7 @@ async function startHermes() { await advanceBootProgress('backend.port', 'Finding an open local port', 16) const port = await pickPort() + reservedPort = port const token = crypto.randomBytes(32).toString('base64url') const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)] // Pin the desktop's chosen profile via the global --profile flag. This is @@ -4811,6 +4837,7 @@ async function startHermes() { ) hermesProcess = null connectionPromise = null + portPool.release(port) sendBackendExit({ code: null, signal: null, error: error.message }) rejectBackendStart?.(error) }) @@ -4818,6 +4845,7 @@ async function startHermes() { rememberLog(`Hermes backend exited (${signal || code})`) hermesProcess = null connectionPromise = null + portPool.release(port) sendBackendExit({ code, signal }) if (!backendReady) { const message = `Hermes backend exited before it became ready (${signal || code}).` @@ -4846,11 +4874,13 @@ async function startHermes() { rememberLog(`[boot] could not read served dashboard token: ${error.message}`) return token }) + // The exit/error handlers null hermesProcess when the child dies, so a + // null here already means "child dead". if ( isForeignBackendToken({ servedToken: authToken, spawnToken: token, - childAlive: hermesProcess.exitCode === null && !hermesProcess.killed + childAlive: hermesProcess !== null && hermesProcess.exitCode === null && !hermesProcess.killed }) ) { // Our child is dead and the port answers with someone else's token: @@ -4890,6 +4920,7 @@ async function startHermes() { { allowDecrease: true } ) connectionPromise = null + portPool.release(reservedPort) throw error }) @@ -5164,8 +5195,8 @@ ipcMain.handle('hermes:bootstrap:reset', async () => { // reset connection state so the next startHermes() call restarts the // full backend flow (including a fresh runBootstrap pass). rememberLog('[bootstrap] reset requested by renderer; clearing latched failure') + await teardownPrimaryBackendAndWait() bootstrapFailure = null - connectionPromise = null bootstrapState = { active: false, manifest: null, diff --git a/apps/desktop/electron/port-pool.cjs b/apps/desktop/electron/port-pool.cjs new file mode 100644 index 00000000000..35131090814 --- /dev/null +++ b/apps/desktop/electron/port-pool.cjs @@ -0,0 +1,73 @@ +'use strict' + +/** + * In-process port reservation pool for the desktop backend launcher. + * + * pickPort() probes a localhost port with a throwaway server and closes it + * before the real bind happens in a separate Python child. Between that probe + * and the child's bind there is a TOCTOU window: a second concurrent spawn + * (the primary backend racing a pool backend) can be handed the SAME port, and + * one then dies with EADDRINUSE ("address already in use" -> "Object has been + * destroyed" boot loop). Reserving the chosen port in THIS process until the + * child exits closes that window. + * + * The OS bind remains the source of truth; this only deconflicts racers inside + * this process — it can't stop a foreign squatter, which the probe + the + * EADDRINUSE self-heal still cover. + * + * The pool is dependency-injected (the availability probe is passed in) and + * free of Electron/Node socket I/O, so it is unit-tested without real sockets + * (see port-pool.test.cjs). + */ +class PortPool { + /** + * @param {number} floor inclusive lowest port to hand out + * @param {number} ceiling inclusive highest port to hand out + */ + constructor(floor, ceiling) { + this.floor = floor + this.ceiling = ceiling + this._reserved = new Set() + } + + /** @returns {boolean} whether `port` is currently reserved in-process. */ + has(port) { + return this._reserved.has(port) + } + + /** Release a previously reserved port. No-op if it was not reserved. */ + release(port) { + this._reserved.delete(port) + } + + /** Drop all reservations. */ + clear() { + this._reserved.clear() + } + + /** @returns {number} count of currently reserved ports. */ + get size() { + return this._reserved.size + } + + /** + * Reserve and return the lowest port in [floor, ceiling] that is neither + * already reserved in-process nor rejected by `isAvailable(port)`, or null + * if every port is taken. `isAvailable` may be sync (boolean) or async + * (Promise); it is awaited either way. + * + * @param {(port: number) => boolean | Promise} isAvailable + * @returns {Promise} + */ + async reserve(isAvailable) { + for (let port = this.floor; port <= this.ceiling; port += 1) { + if (this._reserved.has(port)) continue + if (!(await isAvailable(port))) continue + this._reserved.add(port) + return port + } + return null + } +} + +module.exports = { PortPool } diff --git a/apps/desktop/electron/port-pool.test.cjs b/apps/desktop/electron/port-pool.test.cjs new file mode 100644 index 00000000000..f2600ce7d5f --- /dev/null +++ b/apps/desktop/electron/port-pool.test.cjs @@ -0,0 +1,77 @@ +/** + * Tests for electron/port-pool.cjs. + * + * Run with: node --test electron/port-pool.test.cjs + * + * PortPool is the in-process reservation that closes the pickPort() TOCTOU + * window. These cover selection order, skipping reserved/unavailable ports, + * release/reuse, exhaustion, and async probes — without real sockets. + */ + +const test = require('node:test') +const assert = require('node:assert/strict') + +const { PortPool } = require('./port-pool.cjs') + +const allFree = () => true + +test('reserve returns the lowest free port and reserves it', async () => { + const pool = new PortPool(9120, 9199) + const port = await pool.reserve(allFree) + assert.equal(port, 9120) + assert.ok(pool.has(9120)) + assert.equal(pool.size, 1) +}) + +test('reserve skips ports already reserved in-process', async () => { + const pool = new PortPool(9120, 9199) + const first = await pool.reserve(allFree) + const second = await pool.reserve(allFree) + assert.equal(first, 9120) + assert.equal(second, 9121) +}) + +test('reserve skips ports the probe rejects', async () => { + const pool = new PortPool(9120, 9199) + const busy = new Set([9120, 9121]) + const port = await pool.reserve(p => !busy.has(p)) + assert.equal(port, 9122) +}) + +test('reserve returns null when every port is taken', async () => { + const pool = new PortPool(9120, 9121) + await pool.reserve(allFree) + await pool.reserve(allFree) + assert.equal(await pool.reserve(allFree), null) +}) + +test('release frees a reserved port for reuse', async () => { + const pool = new PortPool(9120, 9120) + assert.equal(await pool.reserve(allFree), 9120) + assert.equal(await pool.reserve(allFree), null) // exhausted + pool.release(9120) + assert.ok(!pool.has(9120)) + assert.equal(await pool.reserve(allFree), 9120) // reusable +}) + +test('release is a no-op for an unreserved port', () => { + const pool = new PortPool(9120, 9199) + pool.release(9120) + assert.equal(pool.size, 0) +}) + +test('reserve awaits an async probe', async () => { + const pool = new PortPool(9120, 9199) + const busy = new Set([9120]) + const port = await pool.reserve(p => Promise.resolve(!busy.has(p))) + assert.equal(port, 9121) +}) + +test('clear drops all reservations', async () => { + const pool = new PortPool(9120, 9199) + await pool.reserve(allFree) + await pool.reserve(allFree) + assert.equal(pool.size, 2) + pool.clear() + assert.equal(pool.size, 0) +}) diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 6d2e1b0ce1c..a552f950f20 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -35,7 +35,7 @@ "test:desktop:nsis": "node scripts/test-desktop.mjs nsis", "test:desktop:existing": "node scripts/test-desktop.mjs existing", "test:desktop:fresh": "node scripts/test-desktop.mjs fresh", - "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs", + "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/port-pool.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs", "typecheck": "tsc -p . --noEmit", "lint": "eslint src/ electron/", "lint:fix": "eslint src/ electron/ --fix",