From 56cf517ccd4c28cf0815a27403dee680dc36ca6e Mon Sep 17 00:00:00 2001 From: liuhao1024 Date: Fri, 26 Jun 2026 01:47:40 +0530 Subject: [PATCH] fix(cron): detect partial job loss in restore_cron_jobs_if_emptied (#52144) The desktop scheduler can overwrite cron/jobs.json with its own small set of internally-tracked crons after an update/restart, causing partial loss of tool-created cron jobs. The previous guard only checked for total loss (live_count == 0), missing the case where live_count > 0 but less than the pre-update snapshot count. Compare live_count against snap_count instead of checking for zero, so both total loss (0 vs N) and partial loss (1 vs 19) trigger restoration. Salvaged from #52161 by @liuhao1024. Closes #52144 --- hermes_cli/backup.py | 33 +++++++++++++++++++++------------ hermes_cli/main.py | 8 +++++--- tests/hermes_cli/test_backup.py | 26 ++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 702077f273a..f3601b6cc4d 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -1005,17 +1005,18 @@ def restore_cron_jobs_if_emptied( Config-version migrations have been observed to leave ``cron/jobs.json`` valid-but-empty after an update, silently dropping every scheduled job - (issue #34600). The existing malformed-shape guards in ``cron/jobs.py`` - don't catch this case because ``{"jobs": []}`` is perfectly valid JSON. + (issue #34600). The desktop scheduler can also overwrite the file with its + own small set of internally-tracked crons, causing partial loss (issue + #52144). This compares the *current* job count against the pre-update snapshot. If - the live file now has **zero** jobs while the snapshot captured **one or - more**, the snapshot copy of ``cron/jobs.json`` is restored in place. + the live file now has **fewer** jobs than the snapshot, the snapshot copy + of ``cron/jobs.json`` is restored in place. The check is deliberately conservative — it only ever restores when there - is unambiguous evidence of loss (snapshot had jobs, live file has none), - so a user who genuinely deleted all their jobs during/after the update is - never second-guessed, and an unreadable live file (count ``None``) is left + is unambiguous evidence of loss (snapshot had more jobs than live file), + so a user who genuinely deleted jobs during/after the update is never + second-guessed, and an unreadable live file (count ``None``) is left untouched so real corruption still surfaces. Args: @@ -1035,10 +1036,9 @@ def restore_cron_jobs_if_emptied( live_path = home / _CRON_JOBS_REL live_count = _count_cron_jobs(live_path) - # Only act when the live file is readable AND empty. ``None`` (missing or - # unparseable) is intentionally left alone — that's a different failure - # mode the user should see rather than have papered over. - if live_count is None or live_count > 0: + # ``None`` (missing or unparseable) is intentionally left alone — that's a + # different failure mode the user should see rather than have papered over. + if live_count is None: return None snap_path = _quick_snapshot_root(home) / snapshot_id / _CRON_JOBS_REL @@ -1046,6 +1046,13 @@ def restore_cron_jobs_if_emptied( if not snap_count: # None or 0 — nothing worth restoring return None + # Restore when live has FEWER jobs than the pre-update snapshot. + # Catches both total loss (0 vs N) and partial loss (1 vs 19) — the + # desktop scheduler can overwrite jobs.json with its own small set of + # internally-tracked crons after an update/restart. + if live_count >= snap_count: + return None + try: live_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(snap_path, live_path) @@ -1057,9 +1064,11 @@ def restore_cron_jobs_if_emptied( logger.warning( "Restored %d cron job(s) from pre-update snapshot %s " - "(cron/jobs.json was emptied during migration)", + "(live file had %d job(s), snapshot had %d — jobs were lost during migration)", snap_count, snapshot_id, + live_count, + snap_count, ) return {"restored": True, "job_count": snap_count, "snapshot_id": snapshot_id} diff --git a/hermes_cli/main.py b/hermes_cli/main.py index b4176f13e16..11451903d1e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -9600,8 +9600,10 @@ def _cmd_update_impl(args, gateway_mode: bool): # Safety net: config-version migrations have been observed to leave # cron/jobs.json valid-but-empty, silently dropping every scheduled - # job (issue #34600). If the live file is now empty while the - # pre-update snapshot held jobs, restore it and warn loudly. + # job (issue #34600). The desktop scheduler can also overwrite with + # its own small set, causing partial loss (issue #52144). If the + # live file now has fewer jobs than the pre-update snapshot, restore + # it and warn loudly. try: from hermes_cli.backup import restore_cron_jobs_if_emptied @@ -9609,7 +9611,7 @@ def _cmd_update_impl(args, gateway_mode: bool): if cron_restore: print() print( - " ⚠️ cron/jobs.json was emptied during this update — " + " ⚠️ cron/jobs.json lost jobs during this update — " f"restored {cron_restore['job_count']} job(s) from " f"pre-update snapshot {cron_restore['snapshot_id']}." ) diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py index cb55fec50ac..b57131f43ec 100644 --- a/tests/hermes_cli/test_backup.py +++ b/tests/hermes_cli/test_backup.py @@ -2098,6 +2098,32 @@ class TestRestoreCronJobsIfEmptied: result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home) assert result is None + def test_restores_when_partial_job_loss(self, tmp_path): + """Desktop scheduler overwrites jobs.json with its own small set, + losing tool-created crons while keeping desktop-tracked ones.""" + from hermes_cli.backup import restore_cron_jobs_if_emptied + hermes_home = tmp_path / ".hermes" + jobs_path = hermes_home / "cron" / "jobs.json" + # Pre-update: 19 jobs (18 tool-created + 1 desktop watchdog). + self._seed_jobs( + jobs_path, + [{"id": f"job-{i}"} for i in range(19)], + ) + snap_id = self._make_snapshot(hermes_home) + assert snap_id + + # Desktop scheduler overwrites with only its own 1 job. + jobs_path.write_text(json.dumps({"jobs": [{"id": "desktop-watchdog"}]})) + + result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home) + assert result is not None + assert result["restored"] is True + assert result["job_count"] == 19 + + # The live file now has all 19 jobs back. + restored = json.loads(jobs_path.read_text()) + assert len(restored["jobs"]) == 19 + def test_noop_when_snapshot_had_no_jobs(self, tmp_path): from hermes_cli.backup import restore_cron_jobs_if_emptied hermes_home = tmp_path / ".hermes"