hermes-agent/tests/tools/test_cronjob_run_immediate.py
kyssta-exe 65d7c7fafd fix(cron): execute job immediately on action='run'
`cronjob(action='run')` (and `hermes cron run`) only set `next_run_at = now`
and returned success, relying on the scheduler ticker to actually execute the
job on its next tick. When no gateway/ticker is running — a CLI-only setup, or
the Windows case in #41037 — the job never executed: `run` reported success,
but `last_run_at` stayed null forever, no output, no delivery.

A manual `run` should actually run. `_execute_job_now` now:

- **claims the job via `claim_job_for_fire`** — the same at-most-once CAS the
  scheduler/external-provider fire path uses. This both advances `next_run_at`
  for recurring jobs and blocks a concurrently-running gateway ticker from
  double-firing the same job; if the claim is lost, the run is skipped (the
  tool reports `execution_skipped`). This closes the double-fire race that a
  bare `advance_next_run` left open (a tick whose `get_due_jobs` already
  captured the job between trigger and advance would still fire it).
- **delegates firing to `run_one_job`** — the single shared
  execute→save→deliver→mark body the ticker and external providers use — so
  failure delivery, `[SILENT]` handling, and live-adapter delivery stay
  identical across paths and can't drift. (The original salvage re-implemented
  this sequence inline and had already dropped failure delivery + `[SILENT]`.)

The tool response carries `executed`, `execution_success`, and either
`execution_error` or `execution_skipped`. The `hermes cron run` CLI message no
longer claims "It will run on the next scheduler tick" — it reports the actual
"Ran now: succeeded/failed" outcome (or the skip).

Salvaged from #41130 by @kyssta-exe (authorship preserved); reworked to reuse
`claim_job_for_fire` + `run_one_job` per review rather than re-implementing the
fire sequence inline. Adds tests for the claim-then-fire path, claim-lost skip,
failure reporting, and exception capture.

Fixes #41037

Co-authored-by: kyssta-exe <kyssta-exe@users.noreply.github.com>
2026-06-21 13:28:04 +05:30

81 lines
4.2 KiB
Python

"""Tests for cronjob action='run' immediate execution (#41037).
Before this fix, `cronjob(action='run')` only set next_run_at=now and returned
success, relying on the scheduler ticker to actually run the job. With no
gateway/ticker active (e.g. a CLI-only Windows setup) the job never executed and
last_run_at stayed null forever. Now action='run' claims the job (at-most-once,
blocking a concurrent tick) and fires it inline via the shared run_one_job body.
"""
import json
from unittest.mock import patch
from tools.cronjob_tools import cronjob, _execute_job_now
_JOB = {"id": "job-run-1", "name": "manual run", "prompt": "hi",
"schedule": {"kind": "cron", "expr": "0 9 * * *"}}
class TestCronjobRunExecutesImmediately:
def test_run_action_claims_and_fires_via_run_one_job(self):
"""action='run' must claim the job then fire it through run_one_job."""
ran = {"job": "after-run", "last_status": "ok", "last_error": None}
with patch("tools.cronjob_tools.resolve_job_ref", return_value=dict(_JOB)), \
patch("tools.cronjob_tools.claim_job_for_fire", return_value=True) as m_claim, \
patch("cron.scheduler.run_one_job", return_value=True) as m_run, \
patch("tools.cronjob_tools.get_job", return_value=ran):
out = json.loads(cronjob(action="run", job_id="job-run-1"))
assert out["success"] is True
assert out["job"]["executed"] is True
assert out["job"]["execution_success"] is True
m_claim.assert_called_once_with("job-run-1") # at-most-once claim taken
m_run.assert_called_once() # fired via the shared body
def test_run_skips_when_claim_lost(self):
"""If the scheduler already holds the fire claim, do NOT double-run."""
with patch("tools.cronjob_tools.resolve_job_ref", return_value=dict(_JOB)), \
patch("tools.cronjob_tools.claim_job_for_fire", return_value=False), \
patch("cron.scheduler.run_one_job") as m_run, \
patch("tools.cronjob_tools.get_job", return_value=dict(_JOB)):
out = json.loads(cronjob(action="run", job_id="job-run-1"))
assert out["success"] is True
assert out["job"]["executed"] is False
assert out["job"]["execution_success"] is False
assert "execution_skipped" in out["job"]
m_run.assert_not_called() # claim lost -> never fired
def test_run_reports_failure_from_last_status(self):
"""A failed run is reported via the re-read job's last_status/last_error."""
failed = {"id": "job-run-1", "last_status": "error", "last_error": "provider 500"}
with patch("tools.cronjob_tools.resolve_job_ref", return_value=dict(_JOB)), \
patch("tools.cronjob_tools.claim_job_for_fire", return_value=True), \
patch("cron.scheduler.run_one_job", return_value=True), \
patch("tools.cronjob_tools.get_job", return_value=failed):
out = json.loads(cronjob(action="run", job_id="job-run-1"))
assert out["job"]["executed"] is True
assert out["job"]["execution_success"] is False
assert out["job"]["execution_error"] == "provider 500"
def test_execute_job_now_bails_without_claim(self):
"""_execute_job_now never calls run_one_job when the claim is lost."""
with patch("tools.cronjob_tools.claim_job_for_fire", return_value=False), \
patch("cron.scheduler.run_one_job") as m_run:
res = _execute_job_now(dict(_JOB))
assert res["claimed"] is False
assert res["success"] is False
m_run.assert_not_called()
def test_execute_job_now_marks_failure_on_exception(self):
"""An exception during fire is captured, marked failed, not propagated."""
with patch("tools.cronjob_tools.claim_job_for_fire", return_value=True), \
patch("cron.scheduler.run_one_job", side_effect=RuntimeError("boom")), \
patch("tools.cronjob_tools.mark_job_run") as m_mark, \
patch("tools.cronjob_tools.get_job", return_value=dict(_JOB)):
res = _execute_job_now(dict(_JOB))
assert res["claimed"] is True
assert res["success"] is False
assert "boom" in res["error"]
m_mark.assert_called_once()