fix(desktop): hand off Windows bootstrap recovery (#45594)

This commit is contained in:
Teknium 2026-06-13 05:54:32 -07:00 committed by GitHub
parent 0333a99925
commit aa53a78d67
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 145 additions and 30 deletions

View file

@ -3,8 +3,9 @@
//! Driven when the installer is launched as `Hermes-Setup.exe --update` (see
//! `AppMode` in lib.rs). The desktop app hands off to us — it exits, then we:
//!
//! 1. wait for the old Hermes desktop process to fully exit (so the venv
//! shim is free; otherwise `hermes update` aborts with exit code 2),
//! 1. wait for the old Hermes desktop process to fully exit (so both the
//! venv shim and packaged app.asar are free; otherwise `hermes update`
//! or repair bootstrap can race locked files),
//! 2. run `hermes update --yes --gateway` (Python/repo update; this does NOT
//! rebuild apps/desktop by design — see cmd_update in hermes_cli/main.py),
//! 3. run `hermes desktop --build-only` (the rebuild step update skips),
@ -38,8 +39,8 @@ use crate::events::{BootstrapEvent, LogStream, StageInfo, StageState};
/// hermes_cli/main.py (sys.exit(2)). We surface a targeted message for this.
const UPDATE_EXIT_CONCURRENT: i32 = 2;
/// How long to wait for the old desktop process to release the venv shim
/// before giving up and letting `hermes update`'s own guard decide.
/// How long to wait for the old desktop process to release files under the
/// install tree before giving up and letting `hermes update`'s own guard decide.
const DESKTOP_EXIT_WAIT: Duration = Duration::from_secs(20);
const DESKTOP_EXIT_POLL: Duration = Duration::from_millis(500);
@ -150,8 +151,10 @@ async fn run_update(app: AppHandle) -> Result<()> {
// ---- pre-step: wait for the old desktop to die -----------------------
// The desktop exec'd us then called app.exit(), but process teardown is
// async on Windows. If it still holds the venv shim, `hermes update`
// aborts with exit 2. Give it a bounded window to clear.
wait_for_venv_free(&install_root, &app).await;
// aborts with exit 2. If it still holds the packaged app.asar,
// install.ps1's repair/re-clone path cannot move/remove the install tree.
// Give both handles a bounded window to clear.
wait_for_install_locks_free(&install_root, &app, "update").await;
// ---- stage 1: hermes update -----------------------------------------
// Pass --branch so `hermes update` targets the branch this installer was
@ -173,8 +176,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
vec!["update".into(), "--yes".into(), "--gateway".into()];
// --force skips `hermes update`'s Windows running-exe guard (which would
// `sys.exit(2)` and dead-end the handoff). By contract the desktop has
// already exited and waited for the venv shim to unlock before launching
// us, and wait_for_venv_free below force-kills any straggler — so by the
// already exited and waited for the install locks to clear before launching
// us, and wait_for_install_locks_free below force-kills any straggler — so by the
// time `hermes update` runs there is no legitimate hermes.exe to protect,
// and the guard would only produce a false "Hermes is still running" stop.
update_args.push("--force".into());
@ -391,48 +394,57 @@ async fn run_update(app: AppHandle) -> Result<()> {
Ok(())
}
/// Poll until the venv shim is no longer locked (Windows) or a bounded timeout
/// elapses. On non-Windows this is a short fixed grace since file locking
/// isn't the failure mode there.
async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
let shim = venv_hermes(install_root);
/// Poll until the venv shim AND packaged desktop app bundle are no longer locked
/// (Windows) or a bounded timeout elapses. On non-Windows this is a short fixed
/// grace since file locking isn't the failure mode there.
pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHandle, stage: &str) {
let lock_targets = install_lock_probe_paths(install_root);
let deadline = Instant::now() + DESKTOP_EXIT_WAIT;
emit_log(app, Some("update"), LogStream::Stdout, "[update] waiting for Hermes to exit…");
emit_log(app, Some(stage), LogStream::Stdout, "[handoff] waiting for Hermes to exit…");
loop {
if !is_locked(&shim) {
let locked = locked_paths(&lock_targets);
if locked.is_empty() {
return;
}
if Instant::now() >= deadline {
// Last resort: a backend hermes.exe (or a grandchild it spawned)
// is still holding the shim. The desktop should have reaped its
// tree before handing off, but SIGTERM races / detached
// grandchildren / AV handles can leave a straggler. Rather than
// "proceed anyway" straight into uv's "Access is denied", force-kill
// every hermes.exe except ourselves, then give the OS a beat to
// unload the image.
// Last resort: a backend hermes.exe (or the desktop Hermes.exe
// itself) is still holding one of the update-sensitive files. The
// desktop should have reaped its tree before handing off, but
// SIGTERM races / detached grandchildren / AV handles can leave a
// straggler. Rather than "proceed anyway" straight into uv's
// "Access is denied" or install.ps1's locked app.asar failure,
// force-kill every Hermes.exe except ourselves, then give the OS a
// beat to unload the image.
emit_log(
app,
Some("update"),
Some(stage),
LogStream::Stdout,
"[update] Hermes still holding the venv shim; force-killing stragglers…",
&format!(
"[handoff] Hermes still holding install files ({}); force-killing stragglers…",
format_locked_paths(&locked)
),
);
force_kill_other_hermes();
tokio::time::sleep(Duration::from_millis(800)).await;
if !is_locked(&shim) {
let locked_after_kill = locked_paths(&lock_targets);
if locked_after_kill.is_empty() {
emit_log(
app,
Some("update"),
Some(stage),
LogStream::Stdout,
"[update] venv shim freed after force-kill",
"[handoff] install files freed after force-kill",
);
} else {
emit_log(
app,
Some("update"),
Some(stage),
LogStream::Stdout,
"[update] venv shim still locked; proceeding (--force + quarantine will handle it)",
&format!(
"[handoff] install files still locked ({}); proceeding (--force + quarantine will handle it)",
format_locked_paths(&locked_after_kill)
),
);
}
return;
@ -441,13 +453,44 @@ async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
}
}
fn install_lock_probe_paths(install_root: &Path) -> Vec<PathBuf> {
let mut paths = vec![venv_hermes(install_root)];
paths.extend(desktop_app_payload_paths(install_root));
paths
}
fn desktop_app_payload_paths(install_root: &Path) -> Vec<PathBuf> {
let release = install_root.join("apps").join("desktop").join("release");
if cfg!(target_os = "windows") {
vec![
release.join("win-unpacked").join("resources").join("app.asar"),
release.join("win-arm64-unpacked").join("resources").join("app.asar"),
]
} else if cfg!(target_os = "macos") {
vec![
release.join("mac").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
release.join("mac-arm64").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
]
} else {
vec![release.join("linux-unpacked").join("resources").join("app.asar")]
}
}
fn locked_paths(paths: &[PathBuf]) -> Vec<PathBuf> {
paths.iter().filter(|p| is_locked(p)).cloned().collect()
}
fn format_locked_paths(paths: &[PathBuf]) -> String {
paths.iter().map(|p| p.display().to_string()).collect::<Vec<_>>().join(", ")
}
/// Force-kill any `hermes.exe` other than this process. Windows-only; a no-op
/// elsewhere (POSIX has no mandatory-lock contention). We can't selectively
/// target "the backend" by PID here — the desktop already exited and we never
/// knew its children — so we kill the whole `hermes.exe` image tree via
/// taskkill, excluding our own PID.
///
/// Safe w.r.t. our own update child: this runs inside `wait_for_venv_free`,
/// Safe w.r.t. our own update child: this runs inside the install-lock wait,
/// which completes BEFORE we spawn `venv\Scripts\hermes.exe update`. At this
/// point no update-driven hermes.exe exists yet, so the only hermes.exe images
/// are stragglers from the old desktop — exactly what we want gone. (`/FI PID
@ -891,6 +934,29 @@ mod tests {
assert!(!is_locked(Path::new("/nonexistent/does/not/exist/xyz")));
}
#[test]
fn lock_probe_paths_include_desktop_app_payload() {
let root = Path::new("/x/hermes-agent");
let probes = install_lock_probe_paths(root);
assert!(
probes.iter().any(|p| p == &venv_hermes(root)),
"venv shim remains part of the update lock probe"
);
assert!(
probes.iter().any(|p| p.ends_with(Path::new("resources/app.asar"))),
"packaged app.asar must be probed so repair/re-clone waits for the old desktop to exit"
);
}
#[test]
fn locked_paths_ignores_missing_payloads() {
let root = Path::new("/nonexistent/hermes-agent");
let probes = install_lock_probe_paths(root);
assert!(locked_paths(&probes).is_empty());
}
#[test]
fn parses_update_branch_from_space_or_equals_args() {
assert_eq!(

View file

@ -1835,6 +1835,44 @@ async function applyUpdates(opts = {}) {
}
}
async function handOffWindowsBootstrapRecovery(reason) {
if (!IS_WINDOWS || !IS_PACKAGED) return false
const updater = resolveUpdaterBinary()
if (!updater) return false
const updateRoot = resolveUpdateRoot()
const { branch: configuredBranch } = readDesktopUpdateConfig()
const branch = directoryExists(path.join(updateRoot, '.git'))
? await resolveHealedBranch(updateRoot, configuredBranch || DEFAULT_UPDATE_BRANCH)
: configuredBranch || DEFAULT_UPDATE_BRANCH
const venvBin = path.join(updateRoot, 'venv', IS_WINDOWS ? 'Scripts' : 'bin')
const venvHermes = path.join(venvBin, IS_WINDOWS ? 'hermes.exe' : 'hermes')
const updaterArgs = fileExists(venvHermes) ? ['--update', '--branch', branch] : ['--repair', '--branch', branch]
await releaseBackendLockForUpdate(updateRoot)
const child = spawn(updater, updaterArgs, {
cwd: HERMES_HOME,
env: {
...process.env,
HERMES_HOME,
PATH: [path.join(HERMES_HOME, 'node', 'bin'), venvBin, process.env.PATH].filter(Boolean).join(path.delimiter)
},
detached: true,
stdio: 'ignore',
windowsHide: false
})
child.unref()
rememberLog(`[bootstrap] handed off ${reason} recovery to updater: ${updater} ${updaterArgs.join(' ')}; exiting desktop to release app.asar`)
setTimeout(() => {
app.quit()
}, 600)
return true
}
// Resolve the hermes CLI to drive an in-app update: prefer the venv shim in
// the install we're updating, fall back to `hermes` on PATH.
function resolveHermesCliBinary(updateRoot) {
@ -2432,6 +2470,14 @@ async function ensureRuntime(backend) {
if (backend.kind === 'bootstrap-needed') {
rememberLog('[bootstrap] no Hermes install found; starting first-launch bootstrap')
if (await handOffWindowsBootstrapRecovery('bootstrap-needed')) {
const handoffError = new Error('Hermes recovery was handed off to Hermes Setup. The desktop will restart when recovery completes.')
handoffError.isBootstrapFailure = true
handoffError.bootstrapHandedOff = true
bootstrapFailure = handoffError
throw handoffError
}
// Eagerly flip the bootstrap UI state to 'active' so the renderer
// shows the install overlay BEFORE the runner finishes fetching the
// manifest (which on slow networks can take tens of seconds and would

View file

@ -42,6 +42,9 @@ test('intentional or interactive desktop child processes stay documented', () =>
const source = readElectronFile('main.cjs')
assert.match(source, /windowsHide: false/)
assert.match(source, /handOffWindowsBootstrapRecovery/)
assert.match(source, /'--repair', '--branch'/)
assert.match(source, /'--update', '--branch'/)
assert.match(source, /nodePty\.spawn\(command, args/)
assert.match(source, /spawn\('cmd\.exe', \['\/c', 'start'/)
})