From 146e77684b717e4c136fdf6de835d26c7b28c87b Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Sat, 6 Jun 2026 12:27:49 -0500 Subject: [PATCH] fix(desktop): bound desktop.log via cascade rotation + reclaim oversized logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supersedes the single-.1 rotation from the prior commit, which only bounded FUTURE growth: rotating a pre-existing oversized desktop.log just renamed the monster to .1 (no disk reclaimed) and left it stranded until a second rotation cycle that a now-healthy app may never reach. The ~326 GB file that motivated this PR would therefore persist as desktop.log.1 after the user updated. Two changes bring desktop.log in line with the Python-side logs (hermes_logging.py RotatingFileHandler, maxBytes x backupCount): 1. Cascade rotation: live -> .1 -> .2 -> .3, dropping the oldest. Steady-state usage is bounded at ~(backupCount + 1) x cap regardless of loop intensity, instead of the old ~2x with a single backup. 2. Pathological-size discard: a file past 4x the cap is a boot-loop artifact with no diagnostic value — delete it (and any equally poisoned backups) outright instead of relocating the disk-exhaustion problem into a sibling. This is what lets an updated app self-heal a disk a stale build filled, on the very next launch, rather than one rotation cycle later. Behavior verified against a real filesystem in a temp dir: under cap -> no rotation; normal overflow -> live becomes .1; repeated overflow keeps exactly backupCount backups (no .4) with total bounded; a pathological live file plus poisoned backups are all reclaimed. node --check passes. Co-authored-by: The Garden --- apps/desktop/electron/main.cjs | 80 +++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index 2b906d59865..054b4e22454 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -247,16 +247,25 @@ const DEFAULT_UPDATE_BRANCH = 'main' const DESKTOP_LOG_PATH = path.join(HERMES_HOME, 'logs', 'desktop.log') const DESKTOP_LOG_FLUSH_MS = 120 const DESKTOP_LOG_BUFFER_MAX_CHARS = 64 * 1024 -// Cap desktop.log on disk. It is an append-only forensic log with no other -// rotation, so a boot loop (e.g. a version-skew crash where the backend exits -// instantly and the renderer keeps hitting Retry) appends the full bootstrap -// transcript on every attempt and can grow without bound — we have seen this -// file reach hundreds of GB and exhaust the disk, which then breaks update and -// install (no room for git/venv/npm temp files). Rotate to a single .1 sibling -// when the live file crosses the cap, so total on-disk usage stays ~2x the cap -// while preserving the most recent transcript for diagnostics. +// Bound desktop.log on disk. It is an append-only forensic log, so a boot loop +// (version-skew crash -> backend exits instantly -> renderer keeps hitting +// Retry) appends the full bootstrap transcript every attempt and grows without +// bound — we have seen it reach ~326 GB and exhaust the disk, which then breaks +// update/install (no room for git/venv/npm temp files). +// +// Mirror the Python logs (hermes_logging.py RotatingFileHandler, maxBytes x +// backupCount): cascade live -> .1 -> .2 -> .3, drop the oldest. Steady-state +// stays bounded at ~(backupCount + 1) x cap however hard the app loops. +// +// Bounding alone never RECLAIMS an already-huge file: a plain rotation just +// renames the monster to .1 and strands it for a cycle a healthy app may never +// reach. A multi-GB boot-loop transcript has no diagnostic value, so anything +// past the discard ceiling is deleted outright — the updated app self-heals a +// disk a stale build filled, on the next launch. const DESKTOP_LOG_MAX_BYTES = 10 * 1024 * 1024 -const DESKTOP_LOG_ROTATED_PATH = `${DESKTOP_LOG_PATH}.1` +const DESKTOP_LOG_BACKUP_COUNT = 3 +const DESKTOP_LOG_DISCARD_BYTES = DESKTOP_LOG_MAX_BYTES * 4 +const desktopLogBackupPath = n => `${DESKTOP_LOG_PATH}.${n}` const BOOT_FAKE_MODE = process.env.HERMES_DESKTOP_BOOT_FAKE === '1' const BOOT_FAKE_STEP_MS = (() => { const raw = Number.parseInt(String(process.env.HERMES_DESKTOP_BOOT_FAKE_STEP_MS || ''), 10) @@ -544,27 +553,56 @@ let bootProgressState = { timestamp: Date.now() } +// Pure planner: ordered fs ops to bound a live log of `size`. [] = nothing. +// Each step is ['rm', path] or ['mv', src, dst]; executed best-effort so a +// missing chain link never aborts the rest. +function planDesktopLogRotation(size) { + if (size < DESKTOP_LOG_MAX_BYTES) return [] + const backups = n => Array.from({ length: n }, (_, i) => desktopLogBackupPath(i + 1)) + // Pathological boot-loop log: reclaim live + every backup outright. + if (size > DESKTOP_LOG_DISCARD_BYTES) { + return [DESKTOP_LOG_PATH, ...backups(DESKTOP_LOG_BACKUP_COUNT)].map(p => ['rm', p]) + } + // Cascade: drop oldest, shift each up, live -> .1. + const ops = [['rm', desktopLogBackupPath(DESKTOP_LOG_BACKUP_COUNT)]] + for (let i = DESKTOP_LOG_BACKUP_COUNT - 1; i >= 1; i--) { + ops.push(['mv', desktopLogBackupPath(i), desktopLogBackupPath(i + 1)]) + } + ops.push(['mv', DESKTOP_LOG_PATH, desktopLogBackupPath(1)]) + return ops +} + function rotateDesktopLogIfNeededSync() { + let size try { - const { size } = fs.statSync(DESKTOP_LOG_PATH) - if (size < DESKTOP_LOG_MAX_BYTES) return - fs.rmSync(DESKTOP_LOG_ROTATED_PATH, { force: true }) - fs.renameSync(DESKTOP_LOG_PATH, DESKTOP_LOG_ROTATED_PATH) + size = fs.statSync(DESKTOP_LOG_PATH).size } catch { - // No file yet (ENOENT) or rotation failed — appending will (re)create it. - // Logging must never block app startup/shutdown. + return // No live file yet — the append (re)creates it. + } + for (const [op, src, dst] of planDesktopLogRotation(size)) { + try { + if (op === 'rm') fs.rmSync(src, { force: true }) + else fs.renameSync(src, dst) + } catch { + // Best-effort — logging must never block startup/shutdown. + } } } async function rotateDesktopLogIfNeededAsync() { + let size try { - const { size } = await fs.promises.stat(DESKTOP_LOG_PATH) - if (size < DESKTOP_LOG_MAX_BYTES) return - await fs.promises.rm(DESKTOP_LOG_ROTATED_PATH, { force: true }) - await fs.promises.rename(DESKTOP_LOG_PATH, DESKTOP_LOG_ROTATED_PATH) + size = (await fs.promises.stat(DESKTOP_LOG_PATH)).size } catch { - // No file yet (ENOENT) or rotation failed — appending will (re)create it. - // Logging must never crash the desktop shell. + return // No live file yet — the append (re)creates it. + } + for (const [op, src, dst] of planDesktopLogRotation(size)) { + try { + if (op === 'rm') await fs.promises.rm(src, { force: true }) + else await fs.promises.rename(src, dst) + } catch { + // Best-effort — logging must never crash the shell. + } } }