fix(cron): detect partial job loss in restore_cron_jobs_if_emptied (#52144)

The desktop scheduler can overwrite cron/jobs.json with its own small
set of internally-tracked crons after an update/restart, causing
partial loss of tool-created cron jobs. The previous guard only
checked for total loss (live_count == 0), missing the case where
live_count > 0 but less than the pre-update snapshot count.

Compare live_count against snap_count instead of checking for zero,
so both total loss (0 vs N) and partial loss (1 vs 19) trigger
restoration.

Salvaged from #52161 by @liuhao1024.

Closes #52144
This commit is contained in:
liuhao1024 2026-06-26 01:47:40 +05:30 committed by Teknium
parent 6b639bc2b9
commit 56cf517ccd
3 changed files with 52 additions and 15 deletions

View file

@ -1005,17 +1005,18 @@ def restore_cron_jobs_if_emptied(
Config-version migrations have been observed to leave ``cron/jobs.json``
valid-but-empty after an update, silently dropping every scheduled job
(issue #34600). The existing malformed-shape guards in ``cron/jobs.py``
don't catch this case because ``{"jobs": []}`` is perfectly valid JSON.
(issue #34600). The desktop scheduler can also overwrite the file with its
own small set of internally-tracked crons, causing partial loss (issue
#52144).
This compares the *current* job count against the pre-update snapshot. If
the live file now has **zero** jobs while the snapshot captured **one or
more**, the snapshot copy of ``cron/jobs.json`` is restored in place.
the live file now has **fewer** jobs than the snapshot, the snapshot copy
of ``cron/jobs.json`` is restored in place.
The check is deliberately conservative it only ever restores when there
is unambiguous evidence of loss (snapshot had jobs, live file has none),
so a user who genuinely deleted all their jobs during/after the update is
never second-guessed, and an unreadable live file (count ``None``) is left
is unambiguous evidence of loss (snapshot had more jobs than live file),
so a user who genuinely deleted jobs during/after the update is never
second-guessed, and an unreadable live file (count ``None``) is left
untouched so real corruption still surfaces.
Args:
@ -1035,10 +1036,9 @@ def restore_cron_jobs_if_emptied(
live_path = home / _CRON_JOBS_REL
live_count = _count_cron_jobs(live_path)
# Only act when the live file is readable AND empty. ``None`` (missing or
# unparseable) is intentionally left alone — that's a different failure
# mode the user should see rather than have papered over.
if live_count is None or live_count > 0:
# ``None`` (missing or unparseable) is intentionally left alone — that's a
# different failure mode the user should see rather than have papered over.
if live_count is None:
return None
snap_path = _quick_snapshot_root(home) / snapshot_id / _CRON_JOBS_REL
@ -1046,6 +1046,13 @@ def restore_cron_jobs_if_emptied(
if not snap_count: # None or 0 — nothing worth restoring
return None
# Restore when live has FEWER jobs than the pre-update snapshot.
# Catches both total loss (0 vs N) and partial loss (1 vs 19) — the
# desktop scheduler can overwrite jobs.json with its own small set of
# internally-tracked crons after an update/restart.
if live_count >= snap_count:
return None
try:
live_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(snap_path, live_path)
@ -1057,9 +1064,11 @@ def restore_cron_jobs_if_emptied(
logger.warning(
"Restored %d cron job(s) from pre-update snapshot %s "
"(cron/jobs.json was emptied during migration)",
"(live file had %d job(s), snapshot had %d — jobs were lost during migration)",
snap_count,
snapshot_id,
live_count,
snap_count,
)
return {"restored": True, "job_count": snap_count, "snapshot_id": snapshot_id}

View file

@ -9600,8 +9600,10 @@ def _cmd_update_impl(args, gateway_mode: bool):
# Safety net: config-version migrations have been observed to leave
# cron/jobs.json valid-but-empty, silently dropping every scheduled
# job (issue #34600). If the live file is now empty while the
# pre-update snapshot held jobs, restore it and warn loudly.
# job (issue #34600). The desktop scheduler can also overwrite with
# its own small set, causing partial loss (issue #52144). If the
# live file now has fewer jobs than the pre-update snapshot, restore
# it and warn loudly.
try:
from hermes_cli.backup import restore_cron_jobs_if_emptied
@ -9609,7 +9611,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
if cron_restore:
print()
print(
" ⚠️ cron/jobs.json was emptied during this update — "
" ⚠️ cron/jobs.json lost jobs during this update — "
f"restored {cron_restore['job_count']} job(s) from "
f"pre-update snapshot {cron_restore['snapshot_id']}."
)

View file

@ -2098,6 +2098,32 @@ class TestRestoreCronJobsIfEmptied:
result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
assert result is None
def test_restores_when_partial_job_loss(self, tmp_path):
"""Desktop scheduler overwrites jobs.json with its own small set,
losing tool-created crons while keeping desktop-tracked ones."""
from hermes_cli.backup import restore_cron_jobs_if_emptied
hermes_home = tmp_path / ".hermes"
jobs_path = hermes_home / "cron" / "jobs.json"
# Pre-update: 19 jobs (18 tool-created + 1 desktop watchdog).
self._seed_jobs(
jobs_path,
[{"id": f"job-{i}"} for i in range(19)],
)
snap_id = self._make_snapshot(hermes_home)
assert snap_id
# Desktop scheduler overwrites with only its own 1 job.
jobs_path.write_text(json.dumps({"jobs": [{"id": "desktop-watchdog"}]}))
result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
assert result is not None
assert result["restored"] is True
assert result["job_count"] == 19
# The live file now has all 19 jobs back.
restored = json.loads(jobs_path.read_text())
assert len(restored["jobs"]) == 19
def test_noop_when_snapshot_had_no_jobs(self, tmp_path):
from hermes_cli.backup import restore_cron_jobs_if_emptied
hermes_home = tmp_path / ".hermes"