mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(cron): detect partial job loss in restore_cron_jobs_if_emptied (#52144)
The desktop scheduler can overwrite cron/jobs.json with its own small set of internally-tracked crons after an update/restart, causing partial loss of tool-created cron jobs. The previous guard only checked for total loss (live_count == 0), missing the case where live_count > 0 but less than the pre-update snapshot count. Compare live_count against snap_count instead of checking for zero, so both total loss (0 vs N) and partial loss (1 vs 19) trigger restoration. Salvaged from #52161 by @liuhao1024. Closes #52144
This commit is contained in:
parent
6b639bc2b9
commit
56cf517ccd
3 changed files with 52 additions and 15 deletions
|
|
@ -1005,17 +1005,18 @@ def restore_cron_jobs_if_emptied(
|
|||
|
||||
Config-version migrations have been observed to leave ``cron/jobs.json``
|
||||
valid-but-empty after an update, silently dropping every scheduled job
|
||||
(issue #34600). The existing malformed-shape guards in ``cron/jobs.py``
|
||||
don't catch this case because ``{"jobs": []}`` is perfectly valid JSON.
|
||||
(issue #34600). The desktop scheduler can also overwrite the file with its
|
||||
own small set of internally-tracked crons, causing partial loss (issue
|
||||
#52144).
|
||||
|
||||
This compares the *current* job count against the pre-update snapshot. If
|
||||
the live file now has **zero** jobs while the snapshot captured **one or
|
||||
more**, the snapshot copy of ``cron/jobs.json`` is restored in place.
|
||||
the live file now has **fewer** jobs than the snapshot, the snapshot copy
|
||||
of ``cron/jobs.json`` is restored in place.
|
||||
|
||||
The check is deliberately conservative — it only ever restores when there
|
||||
is unambiguous evidence of loss (snapshot had jobs, live file has none),
|
||||
so a user who genuinely deleted all their jobs during/after the update is
|
||||
never second-guessed, and an unreadable live file (count ``None``) is left
|
||||
is unambiguous evidence of loss (snapshot had more jobs than live file),
|
||||
so a user who genuinely deleted jobs during/after the update is never
|
||||
second-guessed, and an unreadable live file (count ``None``) is left
|
||||
untouched so real corruption still surfaces.
|
||||
|
||||
Args:
|
||||
|
|
@ -1035,10 +1036,9 @@ def restore_cron_jobs_if_emptied(
|
|||
live_path = home / _CRON_JOBS_REL
|
||||
|
||||
live_count = _count_cron_jobs(live_path)
|
||||
# Only act when the live file is readable AND empty. ``None`` (missing or
|
||||
# unparseable) is intentionally left alone — that's a different failure
|
||||
# mode the user should see rather than have papered over.
|
||||
if live_count is None or live_count > 0:
|
||||
# ``None`` (missing or unparseable) is intentionally left alone — that's a
|
||||
# different failure mode the user should see rather than have papered over.
|
||||
if live_count is None:
|
||||
return None
|
||||
|
||||
snap_path = _quick_snapshot_root(home) / snapshot_id / _CRON_JOBS_REL
|
||||
|
|
@ -1046,6 +1046,13 @@ def restore_cron_jobs_if_emptied(
|
|||
if not snap_count: # None or 0 — nothing worth restoring
|
||||
return None
|
||||
|
||||
# Restore when live has FEWER jobs than the pre-update snapshot.
|
||||
# Catches both total loss (0 vs N) and partial loss (1 vs 19) — the
|
||||
# desktop scheduler can overwrite jobs.json with its own small set of
|
||||
# internally-tracked crons after an update/restart.
|
||||
if live_count >= snap_count:
|
||||
return None
|
||||
|
||||
try:
|
||||
live_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(snap_path, live_path)
|
||||
|
|
@ -1057,9 +1064,11 @@ def restore_cron_jobs_if_emptied(
|
|||
|
||||
logger.warning(
|
||||
"Restored %d cron job(s) from pre-update snapshot %s "
|
||||
"(cron/jobs.json was emptied during migration)",
|
||||
"(live file had %d job(s), snapshot had %d — jobs were lost during migration)",
|
||||
snap_count,
|
||||
snapshot_id,
|
||||
live_count,
|
||||
snap_count,
|
||||
)
|
||||
return {"restored": True, "job_count": snap_count, "snapshot_id": snapshot_id}
|
||||
|
||||
|
|
|
|||
|
|
@ -9600,8 +9600,10 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
|
||||
# Safety net: config-version migrations have been observed to leave
|
||||
# cron/jobs.json valid-but-empty, silently dropping every scheduled
|
||||
# job (issue #34600). If the live file is now empty while the
|
||||
# pre-update snapshot held jobs, restore it and warn loudly.
|
||||
# job (issue #34600). The desktop scheduler can also overwrite with
|
||||
# its own small set, causing partial loss (issue #52144). If the
|
||||
# live file now has fewer jobs than the pre-update snapshot, restore
|
||||
# it and warn loudly.
|
||||
try:
|
||||
from hermes_cli.backup import restore_cron_jobs_if_emptied
|
||||
|
||||
|
|
@ -9609,7 +9611,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
if cron_restore:
|
||||
print()
|
||||
print(
|
||||
" ⚠️ cron/jobs.json was emptied during this update — "
|
||||
" ⚠️ cron/jobs.json lost jobs during this update — "
|
||||
f"restored {cron_restore['job_count']} job(s) from "
|
||||
f"pre-update snapshot {cron_restore['snapshot_id']}."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2098,6 +2098,32 @@ class TestRestoreCronJobsIfEmptied:
|
|||
result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
|
||||
assert result is None
|
||||
|
||||
def test_restores_when_partial_job_loss(self, tmp_path):
|
||||
"""Desktop scheduler overwrites jobs.json with its own small set,
|
||||
losing tool-created crons while keeping desktop-tracked ones."""
|
||||
from hermes_cli.backup import restore_cron_jobs_if_emptied
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
jobs_path = hermes_home / "cron" / "jobs.json"
|
||||
# Pre-update: 19 jobs (18 tool-created + 1 desktop watchdog).
|
||||
self._seed_jobs(
|
||||
jobs_path,
|
||||
[{"id": f"job-{i}"} for i in range(19)],
|
||||
)
|
||||
snap_id = self._make_snapshot(hermes_home)
|
||||
assert snap_id
|
||||
|
||||
# Desktop scheduler overwrites with only its own 1 job.
|
||||
jobs_path.write_text(json.dumps({"jobs": [{"id": "desktop-watchdog"}]}))
|
||||
|
||||
result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
|
||||
assert result is not None
|
||||
assert result["restored"] is True
|
||||
assert result["job_count"] == 19
|
||||
|
||||
# The live file now has all 19 jobs back.
|
||||
restored = json.loads(jobs_path.read_text())
|
||||
assert len(restored["jobs"]) == 19
|
||||
|
||||
def test_noop_when_snapshot_had_no_jobs(self, tmp_path):
|
||||
from hermes_cli.backup import restore_cron_jobs_if_emptied
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue