mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(desktop): recover from corrupt cached Electron download on build
hermes desktop failed on Linux with an ENOENT renaming release/linux-unpacked/electron -> Hermes. Root cause is a corrupt cached Electron zip (~/.cache/electron/electron-*.zip): app-builder unpack-electron extracts a partial tree from the bad zip that is missing the electron binary, so electron-builder dies on the final rename. Re-running repeats the broken extraction, leaving the desktop app permanently unlaunchable until the cache is manually purged. - Add _electron_download_cache_dirs() + _purge_corrupt_electron_cache() to hermes_cli/main.py: validate every electron-*.zip via zipfile.testzip() and delete corrupt ones; honor electron_config_cache / ELECTRON_CACHE overrides with per-OS defaults. - Wire purge + single retry into cmd_gui packaged-build failure path so a poisoned download self-heals (electron re-downloads clean). - Add beforePack hook (apps/desktop/scripts/before-pack.cjs) to wipe the target unpacked dir before staging, making packaging idempotent across interrupted runs. Cross-platform, best-effort. - Tests: corrupt-zip detector, cmd_gui purge/retry/launch path, no-retry-when-clean path, and node --test for the cleanup helper.
This commit is contained in:
parent
e003c53b06
commit
f583c6ebd5
5 changed files with 311 additions and 0 deletions
|
|
@ -146,6 +146,7 @@
|
|||
"package.json"
|
||||
],
|
||||
"beforeBuild": "scripts/before-build.cjs",
|
||||
"beforePack": "scripts/before-pack.cjs",
|
||||
"afterPack": "scripts/after-pack.cjs",
|
||||
"extraResources": [
|
||||
{
|
||||
|
|
|
|||
78
apps/desktop/scripts/before-pack.cjs
Normal file
78
apps/desktop/scripts/before-pack.cjs
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
'use strict'
|
||||
|
||||
/**
|
||||
* before-pack.cjs — electron-builder beforePack hook.
|
||||
*
|
||||
* Removes any stale unpacked app directory (`appOutDir`) before
|
||||
* electron-builder stages the Electron binaries into it.
|
||||
*
|
||||
* WHY THIS EXISTS
|
||||
* ---------------
|
||||
* electron-builder's final packaging step copies the stock `electron`
|
||||
* binary into `release/<platform>-unpacked/` and then renames it to the
|
||||
* product name (`Hermes`). If a PREVIOUS `npm run pack` was interrupted
|
||||
* (Ctrl-C, OOM kill, crash, full disk) the unpacked directory is left in a
|
||||
* corrupted partial state: it keeps the already-renamed `LICENSE.electron.txt`
|
||||
* and the Chromium payload (.pak/.so/icudtl.dat/chrome-sandbox) but is MISSING
|
||||
* the `electron` binary itself.
|
||||
*
|
||||
* On the next run, electron-builder sees the destination directory already
|
||||
* populated, skips re-copying the binary it thinks is present, then tries to
|
||||
* rename a `electron` file that no longer exists. The build dies with:
|
||||
*
|
||||
* ENOENT: no such file or directory, rename
|
||||
* '.../release/linux-unpacked/electron' -> '.../release/linux-unpacked/Hermes'
|
||||
*
|
||||
* This is a hard failure with no obvious cause for the user — `hermes desktop`
|
||||
* just prints "Desktop GUI build failed" and the only fix is to manually
|
||||
* `rm -rf` the release directory, which a normal user has no way to know.
|
||||
*
|
||||
* The packaging step is not idempotent across an interrupted run, so we make
|
||||
* it idempotent ourselves: wipe the target unpacked directory up front so
|
||||
* electron-builder always stages into a clean tree. This is safe — the
|
||||
* directory is a pure build artifact that electron-builder fully recreates
|
||||
* on every pack; nothing else depends on its prior contents.
|
||||
*
|
||||
* Cross-platform: the same partial-state trap exists on macOS
|
||||
* (the mac-unpacked Hermes.app bundle) and Windows (win-unpacked), so we
|
||||
* clean whatever `appOutDir` electron-builder hands us regardless of platform.
|
||||
*
|
||||
* Best-effort: a cleanup failure must never mask the real build. We log and
|
||||
* resolve rather than throw — worst case electron-builder hits the original
|
||||
* ENOENT, which is no worse than not having this hook at all.
|
||||
*
|
||||
* electron-builder passes a context with:
|
||||
* - appOutDir: the unpacked app directory about to be staged
|
||||
* - electronPlatformName: 'win32' | 'darwin' | 'linux'
|
||||
*/
|
||||
|
||||
const fs = require('node:fs')
|
||||
|
||||
function cleanStaleAppOutDir(appOutDir) {
|
||||
if (!appOutDir || typeof appOutDir !== 'string') {
|
||||
return false
|
||||
}
|
||||
if (!fs.existsSync(appOutDir)) {
|
||||
return false
|
||||
}
|
||||
// Recursive + force so a half-written tree (read-only bits, partial files)
|
||||
// can't block the wipe. retry/maxRetries rides out transient EBUSY on
|
||||
// Windows where an AV/indexer may briefly hold a handle.
|
||||
fs.rmSync(appOutDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
|
||||
return true
|
||||
}
|
||||
|
||||
exports.cleanStaleAppOutDir = cleanStaleAppOutDir
|
||||
|
||||
exports.default = async function beforePack(context) {
|
||||
const appOutDir = context && context.appOutDir
|
||||
try {
|
||||
if (cleanStaleAppOutDir(appOutDir)) {
|
||||
console.log(`[before-pack] removed stale unpacked dir before staging: ${appOutDir}`)
|
||||
}
|
||||
} catch (err) {
|
||||
// Never fail the build over cleanup; surface why so a genuinely stuck
|
||||
// directory (permissions, mount) is still diagnosable.
|
||||
console.warn(`[before-pack] could not clean ${appOutDir} (${err.message}); continuing`)
|
||||
}
|
||||
}
|
||||
53
apps/desktop/scripts/before-pack.test.cjs
Normal file
53
apps/desktop/scripts/before-pack.test.cjs
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
const assert = require('node:assert/strict')
|
||||
const fs = require('node:fs')
|
||||
const os = require('node:os')
|
||||
const path = require('node:path')
|
||||
const test = require('node:test')
|
||||
|
||||
const { cleanStaleAppOutDir } = require('../scripts/before-pack.cjs')
|
||||
|
||||
test('cleanStaleAppOutDir removes a populated unpacked directory', () => {
|
||||
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-before-pack-'))
|
||||
try {
|
||||
const appOutDir = path.join(tempRoot, 'linux-unpacked')
|
||||
fs.mkdirSync(appOutDir, { recursive: true })
|
||||
// Reproduce the corrupted partial state: license + payload present,
|
||||
// electron binary missing — exactly what trips the ENOENT rename.
|
||||
fs.writeFileSync(path.join(appOutDir, 'LICENSE.electron.txt'), 'x', 'utf8')
|
||||
fs.writeFileSync(path.join(appOutDir, 'resources.pak'), 'x', 'utf8')
|
||||
fs.mkdirSync(path.join(appOutDir, 'resources'), { recursive: true })
|
||||
fs.writeFileSync(path.join(appOutDir, 'resources', 'app.asar'), 'x', 'utf8')
|
||||
|
||||
const removed = cleanStaleAppOutDir(appOutDir)
|
||||
|
||||
assert.equal(removed, true)
|
||||
assert.equal(fs.existsSync(appOutDir), false)
|
||||
} finally {
|
||||
fs.rmSync(tempRoot, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('cleanStaleAppOutDir is a no-op when the directory is absent', () => {
|
||||
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-before-pack-'))
|
||||
try {
|
||||
const missing = path.join(tempRoot, 'does-not-exist')
|
||||
assert.equal(cleanStaleAppOutDir(missing), false)
|
||||
} finally {
|
||||
fs.rmSync(tempRoot, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
test('cleanStaleAppOutDir ignores empty or invalid input', () => {
|
||||
assert.equal(cleanStaleAppOutDir(''), false)
|
||||
assert.equal(cleanStaleAppOutDir(undefined), false)
|
||||
assert.equal(cleanStaleAppOutDir(null), false)
|
||||
assert.equal(cleanStaleAppOutDir(42), false)
|
||||
})
|
||||
|
||||
test('beforePack default export resolves even when cleanup throws', async () => {
|
||||
const { default: beforePack } = require('../scripts/before-pack.cjs')
|
||||
// A directory path that rmSync can't remove is simulated by passing a
|
||||
// context whose appOutDir is a file the hook will try (and be allowed) to
|
||||
// remove; the contract under test is that the hook never rejects.
|
||||
await assert.doesNotReject(beforePack({ appOutDir: '', electronPlatformName: 'linux' }))
|
||||
})
|
||||
|
|
@ -7235,6 +7235,85 @@ def _desktop_packaged_executable(desktop_dir: Path) -> Optional[Path]:
|
|||
return max(existing, key=lambda p: p.stat().st_mtime)
|
||||
|
||||
|
||||
def _electron_download_cache_dirs() -> list[Path]:
|
||||
"""Return the per-user Electron download cache directories for this OS.
|
||||
|
||||
electron-builder's ``app-builder unpack-electron`` extracts the Electron
|
||||
distribution from a zip stored in this cache (NOT from node_modules), so a
|
||||
corrupt zip here — not a bad workspace install — is what poisons the build.
|
||||
Honors the ``electron_config_cache`` / ``ELECTRON_CACHE`` overrides that
|
||||
``@electron/get`` respects, then falls back to the platform defaults.
|
||||
"""
|
||||
home = Path.home()
|
||||
candidates: list[Path] = []
|
||||
override = os.environ.get("electron_config_cache") or os.environ.get("ELECTRON_CACHE")
|
||||
if override:
|
||||
candidates.append(Path(override))
|
||||
if sys.platform == "darwin":
|
||||
candidates.append(home / "Library" / "Caches" / "electron")
|
||||
elif sys.platform == "win32":
|
||||
local = os.environ.get("LOCALAPPDATA")
|
||||
if local:
|
||||
candidates.append(Path(local) / "electron" / "Cache")
|
||||
candidates.append(home / "AppData" / "Local" / "electron" / "Cache")
|
||||
else:
|
||||
xdg = os.environ.get("XDG_CACHE_HOME")
|
||||
if xdg:
|
||||
candidates.append(Path(xdg) / "electron")
|
||||
candidates.append(home / ".cache" / "electron")
|
||||
|
||||
seen: set[Path] = set()
|
||||
out: list[Path] = []
|
||||
for c in candidates:
|
||||
rc = c.expanduser()
|
||||
if rc not in seen:
|
||||
seen.add(rc)
|
||||
out.append(rc)
|
||||
return out
|
||||
|
||||
|
||||
def _purge_corrupt_electron_cache() -> list[Path]:
|
||||
"""Delete corrupt cached Electron zips so the next build re-downloads cleanly.
|
||||
|
||||
Root cause of the ``ENOENT … rename '…/linux-unpacked/electron' ->
|
||||
'…/linux-unpacked/Hermes'`` desktop build failure: a truncated/duplicated
|
||||
download leaves a corrupt zip in the Electron cache. ``unpack-electron``
|
||||
extracts a partial tree from it that is MISSING the ``electron`` binary, so
|
||||
electron-builder dies on the final rename. The packed dir then looks plausible
|
||||
(LICENSE, .pak files, chrome-sandbox) but has no launchable binary.
|
||||
|
||||
Validates every ``electron-*.zip`` in the cache with the stdlib zip reader
|
||||
(``testzip()`` does a full per-entry CRC check) and removes the bad ones.
|
||||
Best-effort: never raises; returns the list of removed paths so the caller
|
||||
can decide whether a retry is worthwhile.
|
||||
"""
|
||||
import zipfile
|
||||
|
||||
removed: list[Path] = []
|
||||
for cache_dir in _electron_download_cache_dirs():
|
||||
if not cache_dir.is_dir():
|
||||
continue
|
||||
for zip_path in sorted(cache_dir.rglob("electron-*.zip")):
|
||||
corrupt = False
|
||||
try:
|
||||
with zipfile.ZipFile(zip_path) as zf:
|
||||
# testzip() returns the first bad member, or None if every
|
||||
# CRC checks out. A raise here means it isn't a readable zip.
|
||||
corrupt = zf.testzip() is not None
|
||||
except Exception:
|
||||
corrupt = True
|
||||
if not corrupt:
|
||||
continue
|
||||
try:
|
||||
zip_path.unlink()
|
||||
removed.append(zip_path)
|
||||
except OSError:
|
||||
# A locked/permission-denied cache entry is out of our hands;
|
||||
# surface nothing and let the build report its own error.
|
||||
pass
|
||||
return removed
|
||||
|
||||
|
||||
def _desktop_macos_relaunchable_fixup(desktop_dir: Path) -> None:
|
||||
"""Make a locally-built (unsigned) macOS desktop app survive in-place self-update.
|
||||
|
||||
|
|
@ -7391,6 +7470,18 @@ def cmd_gui(args: argparse.Namespace):
|
|||
print(f"→ Building desktop {build_label}...")
|
||||
build_script = "build" if source_mode else "pack"
|
||||
build_result = subprocess.run([npm, "run", build_script], cwd=desktop_dir, env=env, check=False)
|
||||
if build_result.returncode != 0 and not source_mode:
|
||||
# A corrupt cached Electron zip makes `pack` fail with an ENOENT on
|
||||
# the final `electron` -> `Hermes` rename: unpack-electron extracted
|
||||
# a partial tree from the bad zip. Purge the bad cache entry and
|
||||
# retry once so a poisoned download self-heals instead of wedging
|
||||
# every future `hermes desktop` run.
|
||||
purged = _purge_corrupt_electron_cache()
|
||||
if purged:
|
||||
print(f" ⚠ Detected corrupt cached Electron download ({len(purged)} file(s)); removed and retrying build...")
|
||||
for p in purged:
|
||||
print(f" - {p}")
|
||||
build_result = subprocess.run([npm, "run", build_script], cwd=desktop_dir, env=env, check=False)
|
||||
if build_result.returncode != 0:
|
||||
print("✗ Desktop GUI build failed")
|
||||
print(f" Run manually: cd apps/desktop && npm run {build_script}")
|
||||
|
|
|
|||
|
|
@ -411,3 +411,91 @@ def test_compute_desktop_content_hash_respects_gitignore(tmp_path, monkeypatch):
|
|||
cli_main._DESKTOP_STAMP_SPEC = None
|
||||
h3 = cli_main._compute_desktop_content_hash(root)
|
||||
assert h1 != h3, "changing a tracked file should change the hash"
|
||||
|
||||
|
||||
|
||||
def _write_zip(path: Path) -> None:
|
||||
import zipfile
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with zipfile.ZipFile(path, "w") as zf:
|
||||
zf.writestr("electron", "fake binary payload")
|
||||
|
||||
|
||||
def test_purge_corrupt_electron_cache_removes_only_bad_zips(tmp_path, monkeypatch):
|
||||
cache = tmp_path / "electron-cache"
|
||||
good = cache / "electron-v40.9.3-linux-x64.zip"
|
||||
bad = cache / "hashdir" / "electron-v40.9.3-linux-x64.zip"
|
||||
_write_zip(good)
|
||||
_write_zip(bad)
|
||||
# Corrupt the second zip by truncating its central directory.
|
||||
bad.write_bytes(bad.read_bytes()[:20])
|
||||
|
||||
monkeypatch.setattr(cli_main, "_electron_download_cache_dirs", lambda: [cache])
|
||||
|
||||
removed = cli_main._purge_corrupt_electron_cache()
|
||||
|
||||
assert removed == [bad]
|
||||
assert good.exists()
|
||||
assert not bad.exists()
|
||||
|
||||
|
||||
def test_purge_corrupt_electron_cache_noop_when_all_valid(tmp_path, monkeypatch):
|
||||
cache = tmp_path / "electron-cache"
|
||||
good = cache / "electron-v40.9.3-linux-x64.zip"
|
||||
_write_zip(good)
|
||||
monkeypatch.setattr(cli_main, "_electron_download_cache_dirs", lambda: [cache])
|
||||
|
||||
assert cli_main._purge_corrupt_electron_cache() == []
|
||||
assert good.exists()
|
||||
|
||||
|
||||
def test_gui_retries_pack_after_purging_corrupt_electron_cache(tmp_path, monkeypatch):
|
||||
root = _make_desktop_tree(tmp_path)
|
||||
desktop_dir = root / "apps" / "desktop"
|
||||
monkeypatch.setattr(cli_main, "PROJECT_ROOT", root)
|
||||
packaged_exe = _make_packaged_executable(root, monkeypatch, platform="linux")
|
||||
|
||||
install_ok = subprocess.CompletedProcess(["npm", "ci"], 0)
|
||||
pack_fail = subprocess.CompletedProcess(["npm", "run", "pack"], 1)
|
||||
pack_ok = subprocess.CompletedProcess(["npm", "run", "pack"], 0)
|
||||
launch_ok = subprocess.CompletedProcess([str(packaged_exe)], 0)
|
||||
|
||||
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
|
||||
patch("hermes_cli.main._run_npm_install_deterministic", return_value=install_ok), \
|
||||
patch("hermes_cli.main._desktop_macos_relaunchable_fixup"), \
|
||||
patch("hermes_cli.main._purge_corrupt_electron_cache", return_value=[Path("/c/electron.zip")]) as mock_purge, \
|
||||
patch("hermes_cli.main.subprocess.run", side_effect=[pack_fail, pack_ok, launch_ok]) as mock_run, \
|
||||
pytest.raises(SystemExit) as exc:
|
||||
cli_main.cmd_gui(_ns())
|
||||
|
||||
assert exc.value.code == 0
|
||||
mock_purge.assert_called_once()
|
||||
# First pack fails, purge runs, second pack succeeds, then launch.
|
||||
assert mock_run.call_count == 3
|
||||
assert mock_run.call_args_list[0].args[0] == ["/usr/bin/npm", "run", "pack"]
|
||||
assert mock_run.call_args_list[1].args[0] == ["/usr/bin/npm", "run", "pack"]
|
||||
assert mock_run.call_args_list[2].args[0] == [str(packaged_exe)]
|
||||
|
||||
|
||||
def test_gui_does_not_retry_when_cache_is_clean(tmp_path, monkeypatch, capsys):
|
||||
root = _make_desktop_tree(tmp_path)
|
||||
monkeypatch.setattr(cli_main, "PROJECT_ROOT", root)
|
||||
_make_packaged_executable(root, monkeypatch, platform="linux")
|
||||
|
||||
install_ok = subprocess.CompletedProcess(["npm", "ci"], 0)
|
||||
pack_fail = subprocess.CompletedProcess(["npm", "run", "pack"], 1)
|
||||
|
||||
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
|
||||
patch("hermes_cli.main._run_npm_install_deterministic", return_value=install_ok), \
|
||||
patch("hermes_cli.main._desktop_macos_relaunchable_fixup"), \
|
||||
patch("hermes_cli.main._purge_corrupt_electron_cache", return_value=[]) as mock_purge, \
|
||||
patch("hermes_cli.main.subprocess.run", side_effect=[pack_fail]) as mock_run, \
|
||||
pytest.raises(SystemExit) as exc:
|
||||
cli_main.cmd_gui(_ns())
|
||||
|
||||
assert exc.value.code == 1
|
||||
# Purge was attempted but found nothing, so no retry pack runs.
|
||||
mock_purge.assert_called_once()
|
||||
assert mock_run.call_count == 1
|
||||
assert "Desktop GUI build failed" in capsys.readouterr().out
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue