mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-13 09:01:54 +00:00
fix(desktop): bound desktop.log via cascade rotation + reclaim oversized logs
Supersedes the single-.1 rotation from the prior commit, which only bounded FUTURE growth: rotating a pre-existing oversized desktop.log just renamed the monster to .1 (no disk reclaimed) and left it stranded until a second rotation cycle that a now-healthy app may never reach. The ~326 GB file that motivated this PR would therefore persist as desktop.log.1 after the user updated. Two changes bring desktop.log in line with the Python-side logs (hermes_logging.py RotatingFileHandler, maxBytes x backupCount): 1. Cascade rotation: live -> .1 -> .2 -> .3, dropping the oldest. Steady-state usage is bounded at ~(backupCount + 1) x cap regardless of loop intensity, instead of the old ~2x with a single backup. 2. Pathological-size discard: a file past 4x the cap is a boot-loop artifact with no diagnostic value — delete it (and any equally poisoned backups) outright instead of relocating the disk-exhaustion problem into a sibling. This is what lets an updated app self-heal a disk a stale build filled, on the very next launch, rather than one rotation cycle later. Behavior verified against a real filesystem in a temp dir: under cap -> no rotation; normal overflow -> live becomes .1; repeated overflow keeps exactly backupCount backups (no .4) with total bounded; a pathological live file plus poisoned backups are all reclaimed. node --check passes. Co-authored-by: The Garden <chilltulpa@gmail.com>
This commit is contained in:
parent
abbf050241
commit
146e77684b
1 changed files with 59 additions and 21 deletions
|
|
@ -247,16 +247,25 @@ const DEFAULT_UPDATE_BRANCH = 'main'
|
|||
const DESKTOP_LOG_PATH = path.join(HERMES_HOME, 'logs', 'desktop.log')
|
||||
const DESKTOP_LOG_FLUSH_MS = 120
|
||||
const DESKTOP_LOG_BUFFER_MAX_CHARS = 64 * 1024
|
||||
// Cap desktop.log on disk. It is an append-only forensic log with no other
|
||||
// rotation, so a boot loop (e.g. a version-skew crash where the backend exits
|
||||
// instantly and the renderer keeps hitting Retry) appends the full bootstrap
|
||||
// transcript on every attempt and can grow without bound — we have seen this
|
||||
// file reach hundreds of GB and exhaust the disk, which then breaks update and
|
||||
// install (no room for git/venv/npm temp files). Rotate to a single .1 sibling
|
||||
// when the live file crosses the cap, so total on-disk usage stays ~2x the cap
|
||||
// while preserving the most recent transcript for diagnostics.
|
||||
// Bound desktop.log on disk. It is an append-only forensic log, so a boot loop
|
||||
// (version-skew crash -> backend exits instantly -> renderer keeps hitting
|
||||
// Retry) appends the full bootstrap transcript every attempt and grows without
|
||||
// bound — we have seen it reach ~326 GB and exhaust the disk, which then breaks
|
||||
// update/install (no room for git/venv/npm temp files).
|
||||
//
|
||||
// Mirror the Python logs (hermes_logging.py RotatingFileHandler, maxBytes x
|
||||
// backupCount): cascade live -> .1 -> .2 -> .3, drop the oldest. Steady-state
|
||||
// stays bounded at ~(backupCount + 1) x cap however hard the app loops.
|
||||
//
|
||||
// Bounding alone never RECLAIMS an already-huge file: a plain rotation just
|
||||
// renames the monster to .1 and strands it for a cycle a healthy app may never
|
||||
// reach. A multi-GB boot-loop transcript has no diagnostic value, so anything
|
||||
// past the discard ceiling is deleted outright — the updated app self-heals a
|
||||
// disk a stale build filled, on the next launch.
|
||||
const DESKTOP_LOG_MAX_BYTES = 10 * 1024 * 1024
|
||||
const DESKTOP_LOG_ROTATED_PATH = `${DESKTOP_LOG_PATH}.1`
|
||||
const DESKTOP_LOG_BACKUP_COUNT = 3
|
||||
const DESKTOP_LOG_DISCARD_BYTES = DESKTOP_LOG_MAX_BYTES * 4
|
||||
const desktopLogBackupPath = n => `${DESKTOP_LOG_PATH}.${n}`
|
||||
const BOOT_FAKE_MODE = process.env.HERMES_DESKTOP_BOOT_FAKE === '1'
|
||||
const BOOT_FAKE_STEP_MS = (() => {
|
||||
const raw = Number.parseInt(String(process.env.HERMES_DESKTOP_BOOT_FAKE_STEP_MS || ''), 10)
|
||||
|
|
@ -544,27 +553,56 @@ let bootProgressState = {
|
|||
timestamp: Date.now()
|
||||
}
|
||||
|
||||
// Pure planner: ordered fs ops to bound a live log of `size`. [] = nothing.
|
||||
// Each step is ['rm', path] or ['mv', src, dst]; executed best-effort so a
|
||||
// missing chain link never aborts the rest.
|
||||
function planDesktopLogRotation(size) {
|
||||
if (size < DESKTOP_LOG_MAX_BYTES) return []
|
||||
const backups = n => Array.from({ length: n }, (_, i) => desktopLogBackupPath(i + 1))
|
||||
// Pathological boot-loop log: reclaim live + every backup outright.
|
||||
if (size > DESKTOP_LOG_DISCARD_BYTES) {
|
||||
return [DESKTOP_LOG_PATH, ...backups(DESKTOP_LOG_BACKUP_COUNT)].map(p => ['rm', p])
|
||||
}
|
||||
// Cascade: drop oldest, shift each up, live -> .1.
|
||||
const ops = [['rm', desktopLogBackupPath(DESKTOP_LOG_BACKUP_COUNT)]]
|
||||
for (let i = DESKTOP_LOG_BACKUP_COUNT - 1; i >= 1; i--) {
|
||||
ops.push(['mv', desktopLogBackupPath(i), desktopLogBackupPath(i + 1)])
|
||||
}
|
||||
ops.push(['mv', DESKTOP_LOG_PATH, desktopLogBackupPath(1)])
|
||||
return ops
|
||||
}
|
||||
|
||||
function rotateDesktopLogIfNeededSync() {
|
||||
let size
|
||||
try {
|
||||
const { size } = fs.statSync(DESKTOP_LOG_PATH)
|
||||
if (size < DESKTOP_LOG_MAX_BYTES) return
|
||||
fs.rmSync(DESKTOP_LOG_ROTATED_PATH, { force: true })
|
||||
fs.renameSync(DESKTOP_LOG_PATH, DESKTOP_LOG_ROTATED_PATH)
|
||||
size = fs.statSync(DESKTOP_LOG_PATH).size
|
||||
} catch {
|
||||
// No file yet (ENOENT) or rotation failed — appending will (re)create it.
|
||||
// Logging must never block app startup/shutdown.
|
||||
return // No live file yet — the append (re)creates it.
|
||||
}
|
||||
for (const [op, src, dst] of planDesktopLogRotation(size)) {
|
||||
try {
|
||||
if (op === 'rm') fs.rmSync(src, { force: true })
|
||||
else fs.renameSync(src, dst)
|
||||
} catch {
|
||||
// Best-effort — logging must never block startup/shutdown.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function rotateDesktopLogIfNeededAsync() {
|
||||
let size
|
||||
try {
|
||||
const { size } = await fs.promises.stat(DESKTOP_LOG_PATH)
|
||||
if (size < DESKTOP_LOG_MAX_BYTES) return
|
||||
await fs.promises.rm(DESKTOP_LOG_ROTATED_PATH, { force: true })
|
||||
await fs.promises.rename(DESKTOP_LOG_PATH, DESKTOP_LOG_ROTATED_PATH)
|
||||
size = (await fs.promises.stat(DESKTOP_LOG_PATH)).size
|
||||
} catch {
|
||||
// No file yet (ENOENT) or rotation failed — appending will (re)create it.
|
||||
// Logging must never crash the desktop shell.
|
||||
return // No live file yet — the append (re)creates it.
|
||||
}
|
||||
for (const [op, src, dst] of planDesktopLogRotation(size)) {
|
||||
try {
|
||||
if (op === 'rm') await fs.promises.rm(src, { force: true })
|
||||
else await fs.promises.rename(src, dst)
|
||||
} catch {
|
||||
// Best-effort — logging must never crash the shell.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue