fix(desktop): recover from corrupt cached Electron download on build

hermes desktop failed on Linux with an ENOENT renaming
release/linux-unpacked/electron -> Hermes. Root cause is a corrupt
cached Electron zip (~/.cache/electron/electron-*.zip): app-builder
unpack-electron extracts a partial tree from the bad zip that is
missing the electron binary, so electron-builder dies on the final
rename. Re-running repeats the broken extraction, leaving the desktop
app permanently unlaunchable until the cache is manually purged.

- Add _electron_download_cache_dirs() + _purge_corrupt_electron_cache()
  to hermes_cli/main.py: validate every electron-*.zip via
  zipfile.testzip() and delete corrupt ones; honor electron_config_cache
  / ELECTRON_CACHE overrides with per-OS defaults.
- Wire purge + single retry into cmd_gui packaged-build failure path so
  a poisoned download self-heals (electron re-downloads clean).
- Add beforePack hook (apps/desktop/scripts/before-pack.cjs) to wipe the
  target unpacked dir before staging, making packaging idempotent across
  interrupted runs. Cross-platform, best-effort.
- Tests: corrupt-zip detector, cmd_gui purge/retry/launch path,
  no-retry-when-clean path, and node --test for the cleanup helper.
This commit is contained in:
Harry Riddle 2026-06-03 00:43:07 +07:00 committed by Teknium
parent e003c53b06
commit f583c6ebd5
5 changed files with 311 additions and 0 deletions

View file

@ -146,6 +146,7 @@
"package.json"
],
"beforeBuild": "scripts/before-build.cjs",
"beforePack": "scripts/before-pack.cjs",
"afterPack": "scripts/after-pack.cjs",
"extraResources": [
{

View file

@ -0,0 +1,78 @@
'use strict'
/**
* before-pack.cjs electron-builder beforePack hook.
*
* Removes any stale unpacked app directory (`appOutDir`) before
* electron-builder stages the Electron binaries into it.
*
* WHY THIS EXISTS
* ---------------
* electron-builder's final packaging step copies the stock `electron`
* binary into `release/<platform>-unpacked/` and then renames it to the
* product name (`Hermes`). If a PREVIOUS `npm run pack` was interrupted
* (Ctrl-C, OOM kill, crash, full disk) the unpacked directory is left in a
* corrupted partial state: it keeps the already-renamed `LICENSE.electron.txt`
* and the Chromium payload (.pak/.so/icudtl.dat/chrome-sandbox) but is MISSING
* the `electron` binary itself.
*
* On the next run, electron-builder sees the destination directory already
* populated, skips re-copying the binary it thinks is present, then tries to
* rename a `electron` file that no longer exists. The build dies with:
*
* ENOENT: no such file or directory, rename
* '.../release/linux-unpacked/electron' -> '.../release/linux-unpacked/Hermes'
*
* This is a hard failure with no obvious cause for the user `hermes desktop`
* just prints "Desktop GUI build failed" and the only fix is to manually
* `rm -rf` the release directory, which a normal user has no way to know.
*
* The packaging step is not idempotent across an interrupted run, so we make
* it idempotent ourselves: wipe the target unpacked directory up front so
* electron-builder always stages into a clean tree. This is safe the
* directory is a pure build artifact that electron-builder fully recreates
* on every pack; nothing else depends on its prior contents.
*
* Cross-platform: the same partial-state trap exists on macOS
* (the mac-unpacked Hermes.app bundle) and Windows (win-unpacked), so we
* clean whatever `appOutDir` electron-builder hands us regardless of platform.
*
* Best-effort: a cleanup failure must never mask the real build. We log and
* resolve rather than throw worst case electron-builder hits the original
* ENOENT, which is no worse than not having this hook at all.
*
* electron-builder passes a context with:
* - appOutDir: the unpacked app directory about to be staged
* - electronPlatformName: 'win32' | 'darwin' | 'linux'
*/
const fs = require('node:fs')
function cleanStaleAppOutDir(appOutDir) {
if (!appOutDir || typeof appOutDir !== 'string') {
return false
}
if (!fs.existsSync(appOutDir)) {
return false
}
// Recursive + force so a half-written tree (read-only bits, partial files)
// can't block the wipe. retry/maxRetries rides out transient EBUSY on
// Windows where an AV/indexer may briefly hold a handle.
fs.rmSync(appOutDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 })
return true
}
exports.cleanStaleAppOutDir = cleanStaleAppOutDir
exports.default = async function beforePack(context) {
const appOutDir = context && context.appOutDir
try {
if (cleanStaleAppOutDir(appOutDir)) {
console.log(`[before-pack] removed stale unpacked dir before staging: ${appOutDir}`)
}
} catch (err) {
// Never fail the build over cleanup; surface why so a genuinely stuck
// directory (permissions, mount) is still diagnosable.
console.warn(`[before-pack] could not clean ${appOutDir} (${err.message}); continuing`)
}
}

View file

@ -0,0 +1,53 @@
const assert = require('node:assert/strict')
const fs = require('node:fs')
const os = require('node:os')
const path = require('node:path')
const test = require('node:test')
const { cleanStaleAppOutDir } = require('../scripts/before-pack.cjs')
test('cleanStaleAppOutDir removes a populated unpacked directory', () => {
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-before-pack-'))
try {
const appOutDir = path.join(tempRoot, 'linux-unpacked')
fs.mkdirSync(appOutDir, { recursive: true })
// Reproduce the corrupted partial state: license + payload present,
// electron binary missing — exactly what trips the ENOENT rename.
fs.writeFileSync(path.join(appOutDir, 'LICENSE.electron.txt'), 'x', 'utf8')
fs.writeFileSync(path.join(appOutDir, 'resources.pak'), 'x', 'utf8')
fs.mkdirSync(path.join(appOutDir, 'resources'), { recursive: true })
fs.writeFileSync(path.join(appOutDir, 'resources', 'app.asar'), 'x', 'utf8')
const removed = cleanStaleAppOutDir(appOutDir)
assert.equal(removed, true)
assert.equal(fs.existsSync(appOutDir), false)
} finally {
fs.rmSync(tempRoot, { recursive: true, force: true })
}
})
test('cleanStaleAppOutDir is a no-op when the directory is absent', () => {
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-before-pack-'))
try {
const missing = path.join(tempRoot, 'does-not-exist')
assert.equal(cleanStaleAppOutDir(missing), false)
} finally {
fs.rmSync(tempRoot, { recursive: true, force: true })
}
})
test('cleanStaleAppOutDir ignores empty or invalid input', () => {
assert.equal(cleanStaleAppOutDir(''), false)
assert.equal(cleanStaleAppOutDir(undefined), false)
assert.equal(cleanStaleAppOutDir(null), false)
assert.equal(cleanStaleAppOutDir(42), false)
})
test('beforePack default export resolves even when cleanup throws', async () => {
const { default: beforePack } = require('../scripts/before-pack.cjs')
// A directory path that rmSync can't remove is simulated by passing a
// context whose appOutDir is a file the hook will try (and be allowed) to
// remove; the contract under test is that the hook never rejects.
await assert.doesNotReject(beforePack({ appOutDir: '', electronPlatformName: 'linux' }))
})

View file

@ -7235,6 +7235,85 @@ def _desktop_packaged_executable(desktop_dir: Path) -> Optional[Path]:
return max(existing, key=lambda p: p.stat().st_mtime)
def _electron_download_cache_dirs() -> list[Path]:
"""Return the per-user Electron download cache directories for this OS.
electron-builder's ``app-builder unpack-electron`` extracts the Electron
distribution from a zip stored in this cache (NOT from node_modules), so a
corrupt zip here not a bad workspace install is what poisons the build.
Honors the ``electron_config_cache`` / ``ELECTRON_CACHE`` overrides that
``@electron/get`` respects, then falls back to the platform defaults.
"""
home = Path.home()
candidates: list[Path] = []
override = os.environ.get("electron_config_cache") or os.environ.get("ELECTRON_CACHE")
if override:
candidates.append(Path(override))
if sys.platform == "darwin":
candidates.append(home / "Library" / "Caches" / "electron")
elif sys.platform == "win32":
local = os.environ.get("LOCALAPPDATA")
if local:
candidates.append(Path(local) / "electron" / "Cache")
candidates.append(home / "AppData" / "Local" / "electron" / "Cache")
else:
xdg = os.environ.get("XDG_CACHE_HOME")
if xdg:
candidates.append(Path(xdg) / "electron")
candidates.append(home / ".cache" / "electron")
seen: set[Path] = set()
out: list[Path] = []
for c in candidates:
rc = c.expanduser()
if rc not in seen:
seen.add(rc)
out.append(rc)
return out
def _purge_corrupt_electron_cache() -> list[Path]:
"""Delete corrupt cached Electron zips so the next build re-downloads cleanly.
Root cause of the ``ENOENT rename '…/linux-unpacked/electron' ->
'…/linux-unpacked/Hermes'`` desktop build failure: a truncated/duplicated
download leaves a corrupt zip in the Electron cache. ``unpack-electron``
extracts a partial tree from it that is MISSING the ``electron`` binary, so
electron-builder dies on the final rename. The packed dir then looks plausible
(LICENSE, .pak files, chrome-sandbox) but has no launchable binary.
Validates every ``electron-*.zip`` in the cache with the stdlib zip reader
(``testzip()`` does a full per-entry CRC check) and removes the bad ones.
Best-effort: never raises; returns the list of removed paths so the caller
can decide whether a retry is worthwhile.
"""
import zipfile
removed: list[Path] = []
for cache_dir in _electron_download_cache_dirs():
if not cache_dir.is_dir():
continue
for zip_path in sorted(cache_dir.rglob("electron-*.zip")):
corrupt = False
try:
with zipfile.ZipFile(zip_path) as zf:
# testzip() returns the first bad member, or None if every
# CRC checks out. A raise here means it isn't a readable zip.
corrupt = zf.testzip() is not None
except Exception:
corrupt = True
if not corrupt:
continue
try:
zip_path.unlink()
removed.append(zip_path)
except OSError:
# A locked/permission-denied cache entry is out of our hands;
# surface nothing and let the build report its own error.
pass
return removed
def _desktop_macos_relaunchable_fixup(desktop_dir: Path) -> None:
"""Make a locally-built (unsigned) macOS desktop app survive in-place self-update.
@ -7391,6 +7470,18 @@ def cmd_gui(args: argparse.Namespace):
print(f"→ Building desktop {build_label}...")
build_script = "build" if source_mode else "pack"
build_result = subprocess.run([npm, "run", build_script], cwd=desktop_dir, env=env, check=False)
if build_result.returncode != 0 and not source_mode:
# A corrupt cached Electron zip makes `pack` fail with an ENOENT on
# the final `electron` -> `Hermes` rename: unpack-electron extracted
# a partial tree from the bad zip. Purge the bad cache entry and
# retry once so a poisoned download self-heals instead of wedging
# every future `hermes desktop` run.
purged = _purge_corrupt_electron_cache()
if purged:
print(f" ⚠ Detected corrupt cached Electron download ({len(purged)} file(s)); removed and retrying build...")
for p in purged:
print(f" - {p}")
build_result = subprocess.run([npm, "run", build_script], cwd=desktop_dir, env=env, check=False)
if build_result.returncode != 0:
print("✗ Desktop GUI build failed")
print(f" Run manually: cd apps/desktop && npm run {build_script}")

View file

@ -411,3 +411,91 @@ def test_compute_desktop_content_hash_respects_gitignore(tmp_path, monkeypatch):
cli_main._DESKTOP_STAMP_SPEC = None
h3 = cli_main._compute_desktop_content_hash(root)
assert h1 != h3, "changing a tracked file should change the hash"
def _write_zip(path: Path) -> None:
import zipfile
path.parent.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(path, "w") as zf:
zf.writestr("electron", "fake binary payload")
def test_purge_corrupt_electron_cache_removes_only_bad_zips(tmp_path, monkeypatch):
cache = tmp_path / "electron-cache"
good = cache / "electron-v40.9.3-linux-x64.zip"
bad = cache / "hashdir" / "electron-v40.9.3-linux-x64.zip"
_write_zip(good)
_write_zip(bad)
# Corrupt the second zip by truncating its central directory.
bad.write_bytes(bad.read_bytes()[:20])
monkeypatch.setattr(cli_main, "_electron_download_cache_dirs", lambda: [cache])
removed = cli_main._purge_corrupt_electron_cache()
assert removed == [bad]
assert good.exists()
assert not bad.exists()
def test_purge_corrupt_electron_cache_noop_when_all_valid(tmp_path, monkeypatch):
cache = tmp_path / "electron-cache"
good = cache / "electron-v40.9.3-linux-x64.zip"
_write_zip(good)
monkeypatch.setattr(cli_main, "_electron_download_cache_dirs", lambda: [cache])
assert cli_main._purge_corrupt_electron_cache() == []
assert good.exists()
def test_gui_retries_pack_after_purging_corrupt_electron_cache(tmp_path, monkeypatch):
root = _make_desktop_tree(tmp_path)
desktop_dir = root / "apps" / "desktop"
monkeypatch.setattr(cli_main, "PROJECT_ROOT", root)
packaged_exe = _make_packaged_executable(root, monkeypatch, platform="linux")
install_ok = subprocess.CompletedProcess(["npm", "ci"], 0)
pack_fail = subprocess.CompletedProcess(["npm", "run", "pack"], 1)
pack_ok = subprocess.CompletedProcess(["npm", "run", "pack"], 0)
launch_ok = subprocess.CompletedProcess([str(packaged_exe)], 0)
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main._run_npm_install_deterministic", return_value=install_ok), \
patch("hermes_cli.main._desktop_macos_relaunchable_fixup"), \
patch("hermes_cli.main._purge_corrupt_electron_cache", return_value=[Path("/c/electron.zip")]) as mock_purge, \
patch("hermes_cli.main.subprocess.run", side_effect=[pack_fail, pack_ok, launch_ok]) as mock_run, \
pytest.raises(SystemExit) as exc:
cli_main.cmd_gui(_ns())
assert exc.value.code == 0
mock_purge.assert_called_once()
# First pack fails, purge runs, second pack succeeds, then launch.
assert mock_run.call_count == 3
assert mock_run.call_args_list[0].args[0] == ["/usr/bin/npm", "run", "pack"]
assert mock_run.call_args_list[1].args[0] == ["/usr/bin/npm", "run", "pack"]
assert mock_run.call_args_list[2].args[0] == [str(packaged_exe)]
def test_gui_does_not_retry_when_cache_is_clean(tmp_path, monkeypatch, capsys):
root = _make_desktop_tree(tmp_path)
monkeypatch.setattr(cli_main, "PROJECT_ROOT", root)
_make_packaged_executable(root, monkeypatch, platform="linux")
install_ok = subprocess.CompletedProcess(["npm", "ci"], 0)
pack_fail = subprocess.CompletedProcess(["npm", "run", "pack"], 1)
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main._run_npm_install_deterministic", return_value=install_ok), \
patch("hermes_cli.main._desktop_macos_relaunchable_fixup"), \
patch("hermes_cli.main._purge_corrupt_electron_cache", return_value=[]) as mock_purge, \
patch("hermes_cli.main.subprocess.run", side_effect=[pack_fail]) as mock_run, \
pytest.raises(SystemExit) as exc:
cli_main.cmd_gui(_ns())
assert exc.value.code == 1
# Purge was attempted but found nothing, so no retry pack runs.
mock_purge.assert_called_once()
assert mock_run.call_count == 1
assert "Desktop GUI build failed" in capsys.readouterr().out