mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-26 11:12:03 +00:00
fix(desktop): prevent backend port-squat boot loop and pickPort self-collision
Two fixes to the Electron desktop launch path, with the port-reservation logic extracted into a unit-tested module:
1. hermes:bootstrap:reset ("Reload and retry") only cleared connectionPromise, leaving the live backend alive; the orphan kept binding PORT_FLOOR (9120) so the next startHermes() hit EADDRINUSE / "Object has been destroyed" and the window looped. Await teardownPrimaryBackendAndWait() so the reset stops the old backend before restarting.
2. pickPort() probes-then-closes a socket before the real bind happens in a separate Python child, so two concurrent spawns (primary + pool backend) could both be handed PORT_FLOOR and one died with EADDRINUSE. The reservation bookkeeping is extracted into electron/port-pool.cjs (PortPool): pickPort() reserves the chosen port until the child exits and releases it on every exit/error/throw-before-spawn path, closing the TOCTOU window.
PortPool is dependency-injected (probe passed in) and socket-free, unit-tested in electron/port-pool.test.cjs (8 cases) and wired into the test:desktop:platforms script.
(cherry picked from commit d4133945b9)
This commit is contained in:
parent
e3ed7722b5
commit
9ff0ba0827
4 changed files with 190 additions and 9 deletions
|
|
@ -30,6 +30,7 @@ const { buildSessionWindowUrl, createSessionWindowRegistry } = require('./sessio
|
|||
const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
|
||||
const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs')
|
||||
const { isForeignBackendToken, resolveServedDashboardToken } = require('./dashboard-token.cjs')
|
||||
const { PortPool } = require('./port-pool.cjs')
|
||||
const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs')
|
||||
const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs')
|
||||
const { readDirForIpc } = require('./fs-read-dir.cjs')
|
||||
|
|
@ -108,6 +109,10 @@ if (USER_DATA_OVERRIDE) {
|
|||
|
||||
const PORT_FLOOR = 9120
|
||||
const PORT_CEILING = 9199
|
||||
// In-process port reservations that close the pickPort() TOCTOU window where
|
||||
// two concurrent backend spawns could be handed the same port. See
|
||||
// port-pool.cjs for the full rationale.
|
||||
const portPool = new PortPool(PORT_FLOOR, PORT_CEILING)
|
||||
const DEV_SERVER = process.env.HERMES_DESKTOP_DEV_SERVER
|
||||
const IS_PACKAGED = app.isPackaged
|
||||
const IS_MAC = process.platform === 'darwin'
|
||||
|
|
@ -2453,10 +2458,11 @@ function isPortAvailable(port) {
|
|||
}
|
||||
|
||||
async function pickPort() {
|
||||
for (let port = PORT_FLOOR; port <= PORT_CEILING; port += 1) {
|
||||
if (await isPortAvailable(port)) return port
|
||||
const port = await portPool.reserve(isPortAvailable)
|
||||
if (port === null) {
|
||||
throw new Error(`No free localhost port in ${PORT_FLOOR}-${PORT_CEILING}`)
|
||||
}
|
||||
throw new Error(`No free localhost port in ${PORT_FLOOR}-${PORT_CEILING}`)
|
||||
return port
|
||||
}
|
||||
|
||||
function fetchJson(url, token, options = {}) {
|
||||
|
|
@ -4540,9 +4546,20 @@ async function spawnPoolBackend(profile, entry) {
|
|||
// --profile wins over the inherited HERMES_HOME env (see _apply_profile_override
|
||||
// step 3 in hermes_cli/main.py), so the child re-homes to this profile.
|
||||
const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)]
|
||||
const backend = await ensureRuntime(resolveHermesBackend(dashboardArgs))
|
||||
const hermesCwd = resolveHermesCwd()
|
||||
const webDist = resolveWebDist()
|
||||
let backend
|
||||
let hermesCwd
|
||||
let webDist
|
||||
try {
|
||||
backend = await ensureRuntime(resolveHermesBackend(dashboardArgs))
|
||||
hermesCwd = resolveHermesCwd()
|
||||
webDist = resolveWebDist()
|
||||
} catch (error) {
|
||||
// These run before the child exists / its exit handler is attached, so a
|
||||
// throw here would otherwise leak the reservation and slowly exhaust the
|
||||
// 9120-9199 range across switch cycles in one app session.
|
||||
portPool.release(port)
|
||||
throw error
|
||||
}
|
||||
|
||||
rememberLog(`Starting Hermes backend for profile "${profile}" via ${backend.label}`)
|
||||
|
||||
|
|
@ -4580,11 +4597,13 @@ async function spawnPoolBackend(profile, entry) {
|
|||
child.once('error', error => {
|
||||
rememberLog(`Hermes backend for profile "${profile}" failed to start: ${error.message}`)
|
||||
backendPool.delete(profile)
|
||||
portPool.release(port)
|
||||
rejectStart?.(error)
|
||||
})
|
||||
child.once('exit', (code, signal) => {
|
||||
rememberLog(`Hermes backend for profile "${profile}" exited (${signal || code})`)
|
||||
backendPool.delete(profile)
|
||||
portPool.release(port)
|
||||
if (!ready) {
|
||||
rejectStart?.(
|
||||
new Error(`Hermes backend for profile "${profile}" exited before it became ready (${signal || code}).`)
|
||||
|
|
@ -4633,6 +4652,7 @@ function stopPoolBackend(profile) {
|
|||
const entry = backendPool.get(profile)
|
||||
if (!entry) return
|
||||
backendPool.delete(profile)
|
||||
if (entry.port) portPool.release(entry.port)
|
||||
if (entry.process && !entry.process.killed) {
|
||||
try {
|
||||
entry.process.kill('SIGTERM')
|
||||
|
|
@ -4718,6 +4738,11 @@ async function startHermes() {
|
|||
}
|
||||
if (connectionPromise) return connectionPromise
|
||||
|
||||
// Hoisted so the outer .catch can release a port reserved by pickPort() when
|
||||
// a throw (e.g. ensureRuntime failing) happens before the child's exit
|
||||
// handler is attached. Stays null on the remote path (no port picked).
|
||||
let reservedPort = null
|
||||
|
||||
connectionPromise = (async () => {
|
||||
await advanceBootProgress('backend.resolve', 'Resolving Hermes backend', 8)
|
||||
// Resolve for the desktop's primary profile so a per-profile remote
|
||||
|
|
@ -4747,6 +4772,7 @@ async function startHermes() {
|
|||
|
||||
await advanceBootProgress('backend.port', 'Finding an open local port', 16)
|
||||
const port = await pickPort()
|
||||
reservedPort = port
|
||||
const token = crypto.randomBytes(32).toString('base64url')
|
||||
const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)]
|
||||
// Pin the desktop's chosen profile via the global --profile flag. This is
|
||||
|
|
@ -4811,6 +4837,7 @@ async function startHermes() {
|
|||
)
|
||||
hermesProcess = null
|
||||
connectionPromise = null
|
||||
portPool.release(port)
|
||||
sendBackendExit({ code: null, signal: null, error: error.message })
|
||||
rejectBackendStart?.(error)
|
||||
})
|
||||
|
|
@ -4818,6 +4845,7 @@ async function startHermes() {
|
|||
rememberLog(`Hermes backend exited (${signal || code})`)
|
||||
hermesProcess = null
|
||||
connectionPromise = null
|
||||
portPool.release(port)
|
||||
sendBackendExit({ code, signal })
|
||||
if (!backendReady) {
|
||||
const message = `Hermes backend exited before it became ready (${signal || code}).`
|
||||
|
|
@ -4846,11 +4874,13 @@ async function startHermes() {
|
|||
rememberLog(`[boot] could not read served dashboard token: ${error.message}`)
|
||||
return token
|
||||
})
|
||||
// The exit/error handlers null hermesProcess when the child dies, so a
|
||||
// null here already means "child dead".
|
||||
if (
|
||||
isForeignBackendToken({
|
||||
servedToken: authToken,
|
||||
spawnToken: token,
|
||||
childAlive: hermesProcess.exitCode === null && !hermesProcess.killed
|
||||
childAlive: hermesProcess !== null && hermesProcess.exitCode === null && !hermesProcess.killed
|
||||
})
|
||||
) {
|
||||
// Our child is dead and the port answers with someone else's token:
|
||||
|
|
@ -4890,6 +4920,7 @@ async function startHermes() {
|
|||
{ allowDecrease: true }
|
||||
)
|
||||
connectionPromise = null
|
||||
portPool.release(reservedPort)
|
||||
throw error
|
||||
})
|
||||
|
||||
|
|
@ -5164,8 +5195,8 @@ ipcMain.handle('hermes:bootstrap:reset', async () => {
|
|||
// reset connection state so the next startHermes() call restarts the
|
||||
// full backend flow (including a fresh runBootstrap pass).
|
||||
rememberLog('[bootstrap] reset requested by renderer; clearing latched failure')
|
||||
await teardownPrimaryBackendAndWait()
|
||||
bootstrapFailure = null
|
||||
connectionPromise = null
|
||||
bootstrapState = {
|
||||
active: false,
|
||||
manifest: null,
|
||||
|
|
|
|||
73
apps/desktop/electron/port-pool.cjs
Normal file
73
apps/desktop/electron/port-pool.cjs
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
'use strict'
|
||||
|
||||
/**
|
||||
* In-process port reservation pool for the desktop backend launcher.
|
||||
*
|
||||
* pickPort() probes a localhost port with a throwaway server and closes it
|
||||
* before the real bind happens in a separate Python child. Between that probe
|
||||
* and the child's bind there is a TOCTOU window: a second concurrent spawn
|
||||
* (the primary backend racing a pool backend) can be handed the SAME port, and
|
||||
* one then dies with EADDRINUSE ("address already in use" -> "Object has been
|
||||
* destroyed" boot loop). Reserving the chosen port in THIS process until the
|
||||
* child exits closes that window.
|
||||
*
|
||||
* The OS bind remains the source of truth; this only deconflicts racers inside
|
||||
* this process — it can't stop a foreign squatter, which the probe + the
|
||||
* EADDRINUSE self-heal still cover.
|
||||
*
|
||||
* The pool is dependency-injected (the availability probe is passed in) and
|
||||
* free of Electron/Node socket I/O, so it is unit-tested without real sockets
|
||||
* (see port-pool.test.cjs).
|
||||
*/
|
||||
class PortPool {
|
||||
/**
|
||||
* @param {number} floor inclusive lowest port to hand out
|
||||
* @param {number} ceiling inclusive highest port to hand out
|
||||
*/
|
||||
constructor(floor, ceiling) {
|
||||
this.floor = floor
|
||||
this.ceiling = ceiling
|
||||
this._reserved = new Set()
|
||||
}
|
||||
|
||||
/** @returns {boolean} whether `port` is currently reserved in-process. */
|
||||
has(port) {
|
||||
return this._reserved.has(port)
|
||||
}
|
||||
|
||||
/** Release a previously reserved port. No-op if it was not reserved. */
|
||||
release(port) {
|
||||
this._reserved.delete(port)
|
||||
}
|
||||
|
||||
/** Drop all reservations. */
|
||||
clear() {
|
||||
this._reserved.clear()
|
||||
}
|
||||
|
||||
/** @returns {number} count of currently reserved ports. */
|
||||
get size() {
|
||||
return this._reserved.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve and return the lowest port in [floor, ceiling] that is neither
|
||||
* already reserved in-process nor rejected by `isAvailable(port)`, or null
|
||||
* if every port is taken. `isAvailable` may be sync (boolean) or async
|
||||
* (Promise<boolean>); it is awaited either way.
|
||||
*
|
||||
* @param {(port: number) => boolean | Promise<boolean>} isAvailable
|
||||
* @returns {Promise<number|null>}
|
||||
*/
|
||||
async reserve(isAvailable) {
|
||||
for (let port = this.floor; port <= this.ceiling; port += 1) {
|
||||
if (this._reserved.has(port)) continue
|
||||
if (!(await isAvailable(port))) continue
|
||||
this._reserved.add(port)
|
||||
return port
|
||||
}
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { PortPool }
|
||||
77
apps/desktop/electron/port-pool.test.cjs
Normal file
77
apps/desktop/electron/port-pool.test.cjs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Tests for electron/port-pool.cjs.
|
||||
*
|
||||
* Run with: node --test electron/port-pool.test.cjs
|
||||
*
|
||||
* PortPool is the in-process reservation that closes the pickPort() TOCTOU
|
||||
* window. These cover selection order, skipping reserved/unavailable ports,
|
||||
* release/reuse, exhaustion, and async probes — without real sockets.
|
||||
*/
|
||||
|
||||
const test = require('node:test')
|
||||
const assert = require('node:assert/strict')
|
||||
|
||||
const { PortPool } = require('./port-pool.cjs')
|
||||
|
||||
const allFree = () => true
|
||||
|
||||
test('reserve returns the lowest free port and reserves it', async () => {
|
||||
const pool = new PortPool(9120, 9199)
|
||||
const port = await pool.reserve(allFree)
|
||||
assert.equal(port, 9120)
|
||||
assert.ok(pool.has(9120))
|
||||
assert.equal(pool.size, 1)
|
||||
})
|
||||
|
||||
test('reserve skips ports already reserved in-process', async () => {
|
||||
const pool = new PortPool(9120, 9199)
|
||||
const first = await pool.reserve(allFree)
|
||||
const second = await pool.reserve(allFree)
|
||||
assert.equal(first, 9120)
|
||||
assert.equal(second, 9121)
|
||||
})
|
||||
|
||||
test('reserve skips ports the probe rejects', async () => {
|
||||
const pool = new PortPool(9120, 9199)
|
||||
const busy = new Set([9120, 9121])
|
||||
const port = await pool.reserve(p => !busy.has(p))
|
||||
assert.equal(port, 9122)
|
||||
})
|
||||
|
||||
test('reserve returns null when every port is taken', async () => {
|
||||
const pool = new PortPool(9120, 9121)
|
||||
await pool.reserve(allFree)
|
||||
await pool.reserve(allFree)
|
||||
assert.equal(await pool.reserve(allFree), null)
|
||||
})
|
||||
|
||||
test('release frees a reserved port for reuse', async () => {
|
||||
const pool = new PortPool(9120, 9120)
|
||||
assert.equal(await pool.reserve(allFree), 9120)
|
||||
assert.equal(await pool.reserve(allFree), null) // exhausted
|
||||
pool.release(9120)
|
||||
assert.ok(!pool.has(9120))
|
||||
assert.equal(await pool.reserve(allFree), 9120) // reusable
|
||||
})
|
||||
|
||||
test('release is a no-op for an unreserved port', () => {
|
||||
const pool = new PortPool(9120, 9199)
|
||||
pool.release(9120)
|
||||
assert.equal(pool.size, 0)
|
||||
})
|
||||
|
||||
test('reserve awaits an async probe', async () => {
|
||||
const pool = new PortPool(9120, 9199)
|
||||
const busy = new Set([9120])
|
||||
const port = await pool.reserve(p => Promise.resolve(!busy.has(p)))
|
||||
assert.equal(port, 9121)
|
||||
})
|
||||
|
||||
test('clear drops all reservations', async () => {
|
||||
const pool = new PortPool(9120, 9199)
|
||||
await pool.reserve(allFree)
|
||||
await pool.reserve(allFree)
|
||||
assert.equal(pool.size, 2)
|
||||
pool.clear()
|
||||
assert.equal(pool.size, 0)
|
||||
})
|
||||
|
|
@ -35,7 +35,7 @@
|
|||
"test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
|
||||
"test:desktop:existing": "node scripts/test-desktop.mjs existing",
|
||||
"test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/port-pool.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
|
||||
"typecheck": "tsc -p . --noEmit",
|
||||
"lint": "eslint src/ electron/",
|
||||
"lint:fix": "eslint src/ electron/ --fix",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue