mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-18 09:51:59 +00:00
fix(desktop): hand off Windows bootstrap recovery (#45594)
This commit is contained in:
parent
0333a99925
commit
aa53a78d67
3 changed files with 145 additions and 30 deletions
|
|
@ -3,8 +3,9 @@
|
|||
//! Driven when the installer is launched as `Hermes-Setup.exe --update` (see
|
||||
//! `AppMode` in lib.rs). The desktop app hands off to us — it exits, then we:
|
||||
//!
|
||||
//! 1. wait for the old Hermes desktop process to fully exit (so the venv
|
||||
//! shim is free; otherwise `hermes update` aborts with exit code 2),
|
||||
//! 1. wait for the old Hermes desktop process to fully exit (so both the
|
||||
//! venv shim and packaged app.asar are free; otherwise `hermes update`
|
||||
//! or repair bootstrap can race locked files),
|
||||
//! 2. run `hermes update --yes --gateway` (Python/repo update; this does NOT
|
||||
//! rebuild apps/desktop by design — see cmd_update in hermes_cli/main.py),
|
||||
//! 3. run `hermes desktop --build-only` (the rebuild step update skips),
|
||||
|
|
@ -38,8 +39,8 @@ use crate::events::{BootstrapEvent, LogStream, StageInfo, StageState};
|
|||
/// hermes_cli/main.py (sys.exit(2)). We surface a targeted message for this.
|
||||
const UPDATE_EXIT_CONCURRENT: i32 = 2;
|
||||
|
||||
/// How long to wait for the old desktop process to release the venv shim
|
||||
/// before giving up and letting `hermes update`'s own guard decide.
|
||||
/// How long to wait for the old desktop process to release files under the
|
||||
/// install tree before giving up and letting `hermes update`'s own guard decide.
|
||||
const DESKTOP_EXIT_WAIT: Duration = Duration::from_secs(20);
|
||||
const DESKTOP_EXIT_POLL: Duration = Duration::from_millis(500);
|
||||
|
||||
|
|
@ -150,8 +151,10 @@ async fn run_update(app: AppHandle) -> Result<()> {
|
|||
// ---- pre-step: wait for the old desktop to die -----------------------
|
||||
// The desktop exec'd us then called app.exit(), but process teardown is
|
||||
// async on Windows. If it still holds the venv shim, `hermes update`
|
||||
// aborts with exit 2. Give it a bounded window to clear.
|
||||
wait_for_venv_free(&install_root, &app).await;
|
||||
// aborts with exit 2. If it still holds the packaged app.asar,
|
||||
// install.ps1's repair/re-clone path cannot move/remove the install tree.
|
||||
// Give both handles a bounded window to clear.
|
||||
wait_for_install_locks_free(&install_root, &app, "update").await;
|
||||
|
||||
// ---- stage 1: hermes update -----------------------------------------
|
||||
// Pass --branch so `hermes update` targets the branch this installer was
|
||||
|
|
@ -173,8 +176,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
|
|||
vec!["update".into(), "--yes".into(), "--gateway".into()];
|
||||
// --force skips `hermes update`'s Windows running-exe guard (which would
|
||||
// `sys.exit(2)` and dead-end the handoff). By contract the desktop has
|
||||
// already exited and waited for the venv shim to unlock before launching
|
||||
// us, and wait_for_venv_free below force-kills any straggler — so by the
|
||||
// already exited and waited for the install locks to clear before launching
|
||||
// us, and wait_for_install_locks_free below force-kills any straggler — so by the
|
||||
// time `hermes update` runs there is no legitimate hermes.exe to protect,
|
||||
// and the guard would only produce a false "Hermes is still running" stop.
|
||||
update_args.push("--force".into());
|
||||
|
|
@ -391,48 +394,57 @@ async fn run_update(app: AppHandle) -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Poll until the venv shim is no longer locked (Windows) or a bounded timeout
|
||||
/// elapses. On non-Windows this is a short fixed grace since file locking
|
||||
/// isn't the failure mode there.
|
||||
async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
|
||||
let shim = venv_hermes(install_root);
|
||||
/// Poll until the venv shim AND packaged desktop app bundle are no longer locked
|
||||
/// (Windows) or a bounded timeout elapses. On non-Windows this is a short fixed
|
||||
/// grace since file locking isn't the failure mode there.
|
||||
pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHandle, stage: &str) {
|
||||
let lock_targets = install_lock_probe_paths(install_root);
|
||||
let deadline = Instant::now() + DESKTOP_EXIT_WAIT;
|
||||
|
||||
emit_log(app, Some("update"), LogStream::Stdout, "[update] waiting for Hermes to exit…");
|
||||
emit_log(app, Some(stage), LogStream::Stdout, "[handoff] waiting for Hermes to exit…");
|
||||
|
||||
loop {
|
||||
if !is_locked(&shim) {
|
||||
let locked = locked_paths(&lock_targets);
|
||||
if locked.is_empty() {
|
||||
return;
|
||||
}
|
||||
if Instant::now() >= deadline {
|
||||
// Last resort: a backend hermes.exe (or a grandchild it spawned)
|
||||
// is still holding the shim. The desktop should have reaped its
|
||||
// tree before handing off, but SIGTERM races / detached
|
||||
// grandchildren / AV handles can leave a straggler. Rather than
|
||||
// "proceed anyway" straight into uv's "Access is denied", force-kill
|
||||
// every hermes.exe except ourselves, then give the OS a beat to
|
||||
// unload the image.
|
||||
// Last resort: a backend hermes.exe (or the desktop Hermes.exe
|
||||
// itself) is still holding one of the update-sensitive files. The
|
||||
// desktop should have reaped its tree before handing off, but
|
||||
// SIGTERM races / detached grandchildren / AV handles can leave a
|
||||
// straggler. Rather than "proceed anyway" straight into uv's
|
||||
// "Access is denied" or install.ps1's locked app.asar failure,
|
||||
// force-kill every Hermes.exe except ourselves, then give the OS a
|
||||
// beat to unload the image.
|
||||
emit_log(
|
||||
app,
|
||||
Some("update"),
|
||||
Some(stage),
|
||||
LogStream::Stdout,
|
||||
"[update] Hermes still holding the venv shim; force-killing stragglers…",
|
||||
&format!(
|
||||
"[handoff] Hermes still holding install files ({}); force-killing stragglers…",
|
||||
format_locked_paths(&locked)
|
||||
),
|
||||
);
|
||||
force_kill_other_hermes();
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
if !is_locked(&shim) {
|
||||
let locked_after_kill = locked_paths(&lock_targets);
|
||||
if locked_after_kill.is_empty() {
|
||||
emit_log(
|
||||
app,
|
||||
Some("update"),
|
||||
Some(stage),
|
||||
LogStream::Stdout,
|
||||
"[update] venv shim freed after force-kill",
|
||||
"[handoff] install files freed after force-kill",
|
||||
);
|
||||
} else {
|
||||
emit_log(
|
||||
app,
|
||||
Some("update"),
|
||||
Some(stage),
|
||||
LogStream::Stdout,
|
||||
"[update] venv shim still locked; proceeding (--force + quarantine will handle it)",
|
||||
&format!(
|
||||
"[handoff] install files still locked ({}); proceeding (--force + quarantine will handle it)",
|
||||
format_locked_paths(&locked_after_kill)
|
||||
),
|
||||
);
|
||||
}
|
||||
return;
|
||||
|
|
@ -441,13 +453,44 @@ async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
|
|||
}
|
||||
}
|
||||
|
||||
fn install_lock_probe_paths(install_root: &Path) -> Vec<PathBuf> {
|
||||
let mut paths = vec![venv_hermes(install_root)];
|
||||
paths.extend(desktop_app_payload_paths(install_root));
|
||||
paths
|
||||
}
|
||||
|
||||
fn desktop_app_payload_paths(install_root: &Path) -> Vec<PathBuf> {
|
||||
let release = install_root.join("apps").join("desktop").join("release");
|
||||
if cfg!(target_os = "windows") {
|
||||
vec![
|
||||
release.join("win-unpacked").join("resources").join("app.asar"),
|
||||
release.join("win-arm64-unpacked").join("resources").join("app.asar"),
|
||||
]
|
||||
} else if cfg!(target_os = "macos") {
|
||||
vec![
|
||||
release.join("mac").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
|
||||
release.join("mac-arm64").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
|
||||
]
|
||||
} else {
|
||||
vec![release.join("linux-unpacked").join("resources").join("app.asar")]
|
||||
}
|
||||
}
|
||||
|
||||
fn locked_paths(paths: &[PathBuf]) -> Vec<PathBuf> {
|
||||
paths.iter().filter(|p| is_locked(p)).cloned().collect()
|
||||
}
|
||||
|
||||
fn format_locked_paths(paths: &[PathBuf]) -> String {
|
||||
paths.iter().map(|p| p.display().to_string()).collect::<Vec<_>>().join(", ")
|
||||
}
|
||||
|
||||
/// Force-kill any `hermes.exe` other than this process. Windows-only; a no-op
|
||||
/// elsewhere (POSIX has no mandatory-lock contention). We can't selectively
|
||||
/// target "the backend" by PID here — the desktop already exited and we never
|
||||
/// knew its children — so we kill the whole `hermes.exe` image tree via
|
||||
/// taskkill, excluding our own PID.
|
||||
///
|
||||
/// Safe w.r.t. our own update child: this runs inside `wait_for_venv_free`,
|
||||
/// Safe w.r.t. our own update child: this runs inside the install-lock wait,
|
||||
/// which completes BEFORE we spawn `venv\Scripts\hermes.exe update`. At this
|
||||
/// point no update-driven hermes.exe exists yet, so the only hermes.exe images
|
||||
/// are stragglers from the old desktop — exactly what we want gone. (`/FI PID
|
||||
|
|
@ -891,6 +934,29 @@ mod tests {
|
|||
assert!(!is_locked(Path::new("/nonexistent/does/not/exist/xyz")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lock_probe_paths_include_desktop_app_payload() {
|
||||
let root = Path::new("/x/hermes-agent");
|
||||
let probes = install_lock_probe_paths(root);
|
||||
|
||||
assert!(
|
||||
probes.iter().any(|p| p == &venv_hermes(root)),
|
||||
"venv shim remains part of the update lock probe"
|
||||
);
|
||||
assert!(
|
||||
probes.iter().any(|p| p.ends_with(Path::new("resources/app.asar"))),
|
||||
"packaged app.asar must be probed so repair/re-clone waits for the old desktop to exit"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn locked_paths_ignores_missing_payloads() {
|
||||
let root = Path::new("/nonexistent/hermes-agent");
|
||||
let probes = install_lock_probe_paths(root);
|
||||
|
||||
assert!(locked_paths(&probes).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_update_branch_from_space_or_equals_args() {
|
||||
assert_eq!(
|
||||
|
|
|
|||
|
|
@ -1835,6 +1835,44 @@ async function applyUpdates(opts = {}) {
|
|||
}
|
||||
}
|
||||
|
||||
async function handOffWindowsBootstrapRecovery(reason) {
|
||||
if (!IS_WINDOWS || !IS_PACKAGED) return false
|
||||
|
||||
const updater = resolveUpdaterBinary()
|
||||
if (!updater) return false
|
||||
|
||||
const updateRoot = resolveUpdateRoot()
|
||||
const { branch: configuredBranch } = readDesktopUpdateConfig()
|
||||
const branch = directoryExists(path.join(updateRoot, '.git'))
|
||||
? await resolveHealedBranch(updateRoot, configuredBranch || DEFAULT_UPDATE_BRANCH)
|
||||
: configuredBranch || DEFAULT_UPDATE_BRANCH
|
||||
const venvBin = path.join(updateRoot, 'venv', IS_WINDOWS ? 'Scripts' : 'bin')
|
||||
const venvHermes = path.join(venvBin, IS_WINDOWS ? 'hermes.exe' : 'hermes')
|
||||
const updaterArgs = fileExists(venvHermes) ? ['--update', '--branch', branch] : ['--repair', '--branch', branch]
|
||||
|
||||
await releaseBackendLockForUpdate(updateRoot)
|
||||
|
||||
const child = spawn(updater, updaterArgs, {
|
||||
cwd: HERMES_HOME,
|
||||
env: {
|
||||
...process.env,
|
||||
HERMES_HOME,
|
||||
PATH: [path.join(HERMES_HOME, 'node', 'bin'), venvBin, process.env.PATH].filter(Boolean).join(path.delimiter)
|
||||
},
|
||||
detached: true,
|
||||
stdio: 'ignore',
|
||||
windowsHide: false
|
||||
})
|
||||
child.unref()
|
||||
|
||||
rememberLog(`[bootstrap] handed off ${reason} recovery to updater: ${updater} ${updaterArgs.join(' ')}; exiting desktop to release app.asar`)
|
||||
setTimeout(() => {
|
||||
app.quit()
|
||||
}, 600)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Resolve the hermes CLI to drive an in-app update: prefer the venv shim in
|
||||
// the install we're updating, fall back to `hermes` on PATH.
|
||||
function resolveHermesCliBinary(updateRoot) {
|
||||
|
|
@ -2432,6 +2470,14 @@ async function ensureRuntime(backend) {
|
|||
if (backend.kind === 'bootstrap-needed') {
|
||||
rememberLog('[bootstrap] no Hermes install found; starting first-launch bootstrap')
|
||||
|
||||
if (await handOffWindowsBootstrapRecovery('bootstrap-needed')) {
|
||||
const handoffError = new Error('Hermes recovery was handed off to Hermes Setup. The desktop will restart when recovery completes.')
|
||||
handoffError.isBootstrapFailure = true
|
||||
handoffError.bootstrapHandedOff = true
|
||||
bootstrapFailure = handoffError
|
||||
throw handoffError
|
||||
}
|
||||
|
||||
// Eagerly flip the bootstrap UI state to 'active' so the renderer
|
||||
// shows the install overlay BEFORE the runner finishes fetching the
|
||||
// manifest (which on slow networks can take tens of seconds and would
|
||||
|
|
|
|||
|
|
@ -42,6 +42,9 @@ test('intentional or interactive desktop child processes stay documented', () =>
|
|||
const source = readElectronFile('main.cjs')
|
||||
|
||||
assert.match(source, /windowsHide: false/)
|
||||
assert.match(source, /handOffWindowsBootstrapRecovery/)
|
||||
assert.match(source, /'--repair', '--branch'/)
|
||||
assert.match(source, /'--update', '--branch'/)
|
||||
assert.match(source, /nodePty\.spawn\(command, args/)
|
||||
assert.match(source, /spawn\('cmd\.exe', \['\/c', 'start'/)
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue