mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
fix(cron): prevent recurring job re-fire on gateway crash/restart loop (#3396)
When a gateway crashes mid-job execution (before mark_job_run can persist the updated next_run_at), the job would fire again on every restart attempt within the grace window. For a daily 6:15 AM job with a 2-hour grace, rapidly restarting the gateway could trigger dozens of duplicate runs. Fix: call advance_next_run() BEFORE run_job() in tick(). For recurring jobs (cron/interval), this preemptively advances next_run_at to the next future occurrence and persists it to disk. If the process then crashes during execution, the job won't be considered due on restart. One-shot jobs are left unchanged — they still retry on restart since there's no future occurrence to advance to. This changes the scheduler from at-least-once to at-most-once semantics for recurring jobs, which is the correct tradeoff: missing one daily message is far better than sending it dozens of times.
This commit is contained in:
parent
5a1e2a307a
commit
eb2127c1dc
4 changed files with 158 additions and 1 deletions
|
|
@ -687,3 +687,41 @@ class TestBuildJobPromptMissingSkill:
|
|||
result = _build_job_prompt({"skills": ["ghost-skill", "real-skill"], "prompt": "go"})
|
||||
assert "Real skill content." in result
|
||||
assert "go" in result
|
||||
|
||||
|
||||
class TestTickAdvanceBeforeRun:
|
||||
"""Verify that tick() calls advance_next_run before run_job for crash safety."""
|
||||
|
||||
def test_advance_called_before_run_job(self, tmp_path):
|
||||
"""advance_next_run must be called before run_job to prevent crash-loop re-fires."""
|
||||
call_order = []
|
||||
|
||||
def fake_advance(job_id):
|
||||
call_order.append(("advance", job_id))
|
||||
return True
|
||||
|
||||
def fake_run_job(job):
|
||||
call_order.append(("run", job["id"]))
|
||||
return True, "output", "response", None
|
||||
|
||||
fake_job = {
|
||||
"id": "test-advance",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
"enabled": True,
|
||||
"schedule": {"kind": "cron", "expr": "15 6 * * *"},
|
||||
}
|
||||
|
||||
with patch("cron.scheduler.get_due_jobs", return_value=[fake_job]), \
|
||||
patch("cron.scheduler.advance_next_run", side_effect=fake_advance) as adv_mock, \
|
||||
patch("cron.scheduler.run_job", side_effect=fake_run_job), \
|
||||
patch("cron.scheduler.save_job_output", return_value=tmp_path / "out.md"), \
|
||||
patch("cron.scheduler.mark_job_run"), \
|
||||
patch("cron.scheduler._deliver_result"):
|
||||
from cron.scheduler import tick
|
||||
executed = tick(verbose=False)
|
||||
|
||||
assert executed == 1
|
||||
adv_mock.assert_called_once_with("test-advance")
|
||||
# advance must happen before run
|
||||
assert call_order == [("advance", "test-advance"), ("run", "test-advance")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue