fix(desktop): purge electron cache unconditionally, not via stdlib zipfile gate

The salvaged detector validated each cached electron-*.zip with
zipfile.testzip() and only purged ones it judged corrupt. But stdlib
zipfile reads from the end-of-central-directory backward, so it silently
tolerates prepended/concatenated junk — which is exactly the corruption
the bug report names ('86257938 extra bytes at beginning or within
zipfile', a partial download resumed into the same file). testzip()
returns clean on those zips, so the self-heal never fired for the
reported failure mode.

Drop the self-rolled validator: on any packaged-build failure, purge the
version's cached zips AND the half-written unpacked dir, then retry once.
@electron/get re-downloads with its own SHASUM verification — the real
source of truth, which catches prepend/concat/truncate alike. An
unrelated failure just costs one clean re-download and fails the same way.

Verified empirically: zipfile.testzip() returns None (clean) on a
prepended-junk zip; the unconditional purge removes it correctly.
This commit is contained in:
Teknium 2026-06-04 05:40:03 -07:00
parent f583c6ebd5
commit fef04a197e
2 changed files with 100 additions and 55 deletions

View file

@ -7272,45 +7272,62 @@ def _electron_download_cache_dirs() -> list[Path]:
return out
def _purge_corrupt_electron_cache() -> list[Path]:
"""Delete corrupt cached Electron zips so the next build re-downloads cleanly.
def _purge_electron_build_cache(desktop_dir: Path) -> list[Path]:
"""Clear the cached Electron download + half-written unpacked dir so the
next ``pack`` re-downloads and re-stages from scratch.
Root cause of the ``ENOENT rename '…/linux-unpacked/electron' ->
'…/linux-unpacked/Hermes'`` desktop build failure: a truncated/duplicated
download leaves a corrupt zip in the Electron cache. ``unpack-electron``
extracts a partial tree from it that is MISSING the ``electron`` binary, so
electron-builder dies on the final rename. The packed dir then looks plausible
(LICENSE, .pak files, chrome-sandbox) but has no launchable binary.
'…/linux-unpacked/Hermes'`` desktop build failure: a corrupt zip in the
per-user Electron download cache (a partial download resumed into the same
file leaves prepended/concatenated junk, or an interrupted write truncates
it). electron-builder's ``app-builder unpack-electron`` extracts the
distribution from that cached zip (NOT from node_modules); a bad zip yields
a partial tree MISSING the 193 MB ``electron`` binary, so the final rename
dies. Re-running repeats the same broken extraction forever.
Validates every ``electron-*.zip`` in the cache with the stdlib zip reader
(``testzip()`` does a full per-entry CRC check) and removes the bad ones.
Best-effort: never raises; returns the list of removed paths so the caller
can decide whether a retry is worthwhile.
We deliberately do NOT try to detect corruption ourselves. stdlib
``zipfile`` silently tolerates the prepended/concatenated junk that is the
most common corruption here it reads from the end-of-central-directory
backward, so ``testzip()`` returns clean on exactly the zips ``unzip -t``
and ``@electron/get`` reject. Gating the purge on a self-rolled validator
would therefore skip the real-world case and never self-heal. Instead, on a
packaged-build failure we unconditionally remove the version's cached zips
and the stale unpacked dir, then let the caller retry once: ``@electron/get``
re-downloads with its own SHASUM verification (the real source of truth),
and ``before-pack.cjs`` re-wipes the unpacked dir. If the failure was
unrelated, a clean re-download is harmless and the retry fails the same way.
Best-effort: never raises. Returns the paths removed so the caller can log
them and decide whether a retry is worthwhile (empty list nothing to
clear, so no point retrying).
"""
import zipfile
removed: list[Path] = []
for cache_dir in _electron_download_cache_dirs():
if not cache_dir.is_dir():
continue
for zip_path in sorted(cache_dir.rglob("electron-*.zip")):
corrupt = False
try:
with zipfile.ZipFile(zip_path) as zf:
# testzip() returns the first bad member, or None if every
# CRC checks out. A raise here means it isn't a readable zip.
corrupt = zf.testzip() is not None
except Exception:
corrupt = True
if not corrupt:
continue
try:
zip_path.unlink()
removed.append(zip_path)
except OSError:
# A locked/permission-denied cache entry is out of our hands;
# surface nothing and let the build report its own error.
# Locked/permission-denied entry is out of our hands; let the
# build report its own error rather than masking it.
pass
# Drop the half-written unpacked dir too: an interrupted prior pack leaves
# a partial tree that poisons the rename even after the zip is fixed.
# (before-pack.cjs also handles this, but clearing it here makes the retry
# robust even if the hook is somehow skipped.)
release_dir = desktop_dir / "release"
if release_dir.is_dir():
for unpacked in release_dir.glob("*-unpacked"):
try:
shutil.rmtree(unpacked, ignore_errors=True)
removed.append(unpacked)
except OSError:
pass
return removed
@ -7471,14 +7488,22 @@ def cmd_gui(args: argparse.Namespace):
build_script = "build" if source_mode else "pack"
build_result = subprocess.run([npm, "run", build_script], cwd=desktop_dir, env=env, check=False)
if build_result.returncode != 0 and not source_mode:
# A corrupt cached Electron zip makes `pack` fail with an ENOENT on
# the final `electron` -> `Hermes` rename: unpack-electron extracted
# a partial tree from the bad zip. Purge the bad cache entry and
# retry once so a poisoned download self-heals instead of wedging
# every future `hermes desktop` run.
purged = _purge_corrupt_electron_cache()
# A corrupt cached Electron zip makes `pack` fail with an ENOENT
# on the final `electron` -> `Hermes` rename: unpack-electron
# extracted a partial tree (missing the 193 MB binary) from the
# bad zip. We do NOT try to prove the zip is corrupt ourselves —
# stdlib zipfile silently tolerates the prepended/concatenated
# junk that is the most common corruption (a partial download
# resumed into the same file), so a `testzip()` gate would pass
# and never self-heal. Instead, on any packaged-build failure we
# purge the version's cached zip + the half-written unpacked dir
# and retry once: @electron/get re-downloads with its own SHASUM
# verification, which is the real source of truth. If the
# failure was something else, the clean re-download is harmless
# and the retry fails the same way.
purged = _purge_electron_build_cache(desktop_dir)
if purged:
print(f" ⚠ Detected corrupt cached Electron download ({len(purged)} file(s)); removed and retrying build...")
print(" ⚠ Desktop build failed; cleared cached Electron download and retrying once...")
for p in purged:
print(f" - {p}")
build_result = subprocess.run([npm, "run", build_script], cwd=desktop_dir, env=env, check=False)

View file

@ -413,6 +413,8 @@ def test_compute_desktop_content_hash_respects_gitignore(tmp_path, monkeypatch):
assert h1 != h3, "changing a tracked file should change the hash"
# ── Electron build-cache recovery tests ───────────────────────────────
def _write_zip(path: Path) -> None:
import zipfile
@ -422,37 +424,53 @@ def _write_zip(path: Path) -> None:
zf.writestr("electron", "fake binary payload")
def test_purge_corrupt_electron_cache_removes_only_bad_zips(tmp_path, monkeypatch):
def test_purge_electron_build_cache_clears_all_zips_and_unpacked_dir(tmp_path, monkeypatch):
"""Purge is unconditional: it removes every electron-*.zip (regardless of
whether stdlib zipfile thinks it's corrupt) plus the half-written unpacked
dir, because @electron/get's own SHASUM check on re-download is the real
validator not a self-rolled one."""
cache = tmp_path / "electron-cache"
good = cache / "electron-v40.9.3-linux-x64.zip"
bad = cache / "hashdir" / "electron-v40.9.3-linux-x64.zip"
_write_zip(good)
_write_zip(bad)
# Corrupt the second zip by truncating its central directory.
bad.write_bytes(bad.read_bytes()[:20])
# A "clean" zip and a prepended-junk zip — the latter is the real-world
# corruption that zipfile.testzip() silently passes (it reads from the
# end-of-central-directory backward), which is why we don't gate on it.
clean = cache / "electron-v40.9.3-linux-x64.zip"
prepended = cache / "hashdir" / "electron-v40.9.3-linux-x64.zip"
_write_zip(clean)
_write_zip(prepended)
prepended.write_bytes(b"\x00" * 4096 + prepended.read_bytes())
desktop_dir = tmp_path / "apps" / "desktop"
unpacked = desktop_dir / "release" / "linux-unpacked"
unpacked.mkdir(parents=True)
(unpacked / "LICENSE.electron.txt").write_text("x", encoding="utf-8")
(unpacked / "resources.pak").write_text("x", encoding="utf-8")
monkeypatch.setattr(cli_main, "_electron_download_cache_dirs", lambda: [cache])
removed = cli_main._purge_corrupt_electron_cache()
removed = cli_main._purge_electron_build_cache(desktop_dir)
assert removed == [bad]
assert good.exists()
assert not bad.exists()
assert clean in removed
assert prepended in removed
assert unpacked in removed
assert not clean.exists()
assert not prepended.exists()
assert not unpacked.exists()
def test_purge_corrupt_electron_cache_noop_when_all_valid(tmp_path, monkeypatch):
def test_purge_electron_build_cache_empty_when_nothing_present(tmp_path, monkeypatch):
"""No cached zips and no unpacked dir → nothing removed, so the caller
knows a retry is pointless."""
cache = tmp_path / "electron-cache"
good = cache / "electron-v40.9.3-linux-x64.zip"
_write_zip(good)
cache.mkdir()
desktop_dir = tmp_path / "apps" / "desktop"
monkeypatch.setattr(cli_main, "_electron_download_cache_dirs", lambda: [cache])
assert cli_main._purge_corrupt_electron_cache() == []
assert good.exists()
assert cli_main._purge_electron_build_cache(desktop_dir) == []
def test_gui_retries_pack_after_purging_corrupt_electron_cache(tmp_path, monkeypatch):
def test_gui_retries_pack_once_after_purging_build_cache(tmp_path, monkeypatch):
"""First pack fails, purge clears the cache, second pack succeeds, launch."""
root = _make_desktop_tree(tmp_path)
desktop_dir = root / "apps" / "desktop"
monkeypatch.setattr(cli_main, "PROJECT_ROOT", root)
packaged_exe = _make_packaged_executable(root, monkeypatch, platform="linux")
@ -464,21 +482,24 @@ def test_gui_retries_pack_after_purging_corrupt_electron_cache(tmp_path, monkeyp
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main._run_npm_install_deterministic", return_value=install_ok), \
patch("hermes_cli.main._desktop_macos_relaunchable_fixup"), \
patch("hermes_cli.main._purge_corrupt_electron_cache", return_value=[Path("/c/electron.zip")]) as mock_purge, \
patch("hermes_cli.main._desktop_linux_sandbox_fixup", return_value=True), \
patch("hermes_cli.main._write_desktop_build_stamp"), \
patch("hermes_cli.main._purge_electron_build_cache", return_value=[Path("/c/electron.zip")]) as mock_purge, \
patch("hermes_cli.main.subprocess.run", side_effect=[pack_fail, pack_ok, launch_ok]) as mock_run, \
pytest.raises(SystemExit) as exc:
cli_main.cmd_gui(_ns())
assert exc.value.code == 0
mock_purge.assert_called_once()
# First pack fails, purge runs, second pack succeeds, then launch.
# pack(fail) → purge → pack(ok) → launch = 3 subprocess.run calls
assert mock_run.call_count == 3
assert mock_run.call_args_list[0].args[0] == ["/usr/bin/npm", "run", "pack"]
assert mock_run.call_args_list[1].args[0] == ["/usr/bin/npm", "run", "pack"]
assert mock_run.call_args_list[2].args[0] == [str(packaged_exe)]
def test_gui_does_not_retry_when_cache_is_clean(tmp_path, monkeypatch, capsys):
def test_gui_does_not_retry_when_purge_finds_nothing(tmp_path, monkeypatch, capsys):
"""If the purge clears nothing, there's no point retrying — fail fast."""
root = _make_desktop_tree(tmp_path)
monkeypatch.setattr(cli_main, "PROJECT_ROOT", root)
_make_packaged_executable(root, monkeypatch, platform="linux")
@ -489,13 +510,12 @@ def test_gui_does_not_retry_when_cache_is_clean(tmp_path, monkeypatch, capsys):
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main._run_npm_install_deterministic", return_value=install_ok), \
patch("hermes_cli.main._desktop_macos_relaunchable_fixup"), \
patch("hermes_cli.main._purge_corrupt_electron_cache", return_value=[]) as mock_purge, \
patch("hermes_cli.main._purge_electron_build_cache", return_value=[]) as mock_purge, \
patch("hermes_cli.main.subprocess.run", side_effect=[pack_fail]) as mock_run, \
pytest.raises(SystemExit) as exc:
cli_main.cmd_gui(_ns())
assert exc.value.code == 1
# Purge was attempted but found nothing, so no retry pack runs.
mock_purge.assert_called_once()
assert mock_run.call_count == 1
assert "Desktop GUI build failed" in capsys.readouterr().out