hermes-agent/tests/plugins/test_chronos_cron.py
Ben 4c8bbe6416 feat(cron): Chronos NAS-mediated managed-cron provider (scale-to-zero)
Phase 4D. The first non-default CronScheduler: plugins/cron/chronos/. Inert
unless cron.provider=chronos; resolve_cron_scheduler falls back to the built-in
if unavailable, so cron never loses its trigger.

Files:
- chronos/__init__.py — ChronosCronScheduler + register(ctx).
  * is_available(): config-only, NO network (portal_url + callback_url + a
    stored Nous access token via get_provider_auth_state). Returns False →
    resolver falls back to built-in.
  * start(): reconcile() then RETURN — no blocking loop, no 60s wake (DQ-1:
    this is what makes scale-to-zero real; the machine wakes only on a
    NAS→agent fire).
  * _arm_one_shot(job): POST NAS provision {job_id, fire_at, agent_callback_url,
    dedup_key=job_id:fire_at}. Agent owns the time → sub-minute fires survive
    (no scheduler 1-minute floor).
  * reconcile(): converge NAS arms toward jobs.json — arm missing/changed-time,
    cancel orphaned, skip paused. Cold process rebuilds from jobs.json +
    idempotent dedup_key.
  * on_jobs_changed(): reconcile (re-arm/cancel the affected one-shot).
  * fire_due(): ABC default (CAS claim + run_one_job) THEN re-arm the next
    one-shot. Job gone (one-shot done / repeat-N exhausted) → no re-arm.
- chronos/_nas_client.py — thin HTTP wrapper for provision/cancel/list using
  the agent's existing refresh-aware Nous token (resolve_nous_access_token).
  Names no scheduler vendor; holds no scheduler creds.
- chronos/plugin.yaml — discovery metadata.

INVARIANT: zero "qstash"/"upstash" hits in plugins/cron, gateway, hermes_cli,
website/docs — the external scheduler is a NAS-internal detail, never named
agent-side.

Tests (13, all NAS mocked, zero network): is_available off-without-config +
on-with-config + makes-no-network; arm payload incl. sub-minute + noop without
next_run; reconcile arms-all / cancels-orphan / skips-paused / skips-already-
armed; fire_due re-arms next / no re-arm when job gone / no re-arm when claim
lost.
2026-06-18 14:40:56 +10:00

203 lines
7.9 KiB
Python

"""Unit tests for the Chronos NAS-mediated cron provider (Phase 4D).
All NAS calls are mocked — ZERO live network. These prove:
- is_available is config-only (no network), false without config.
- one-shot arming sends the right provision payload (incl. sub-minute fires —
the agent owns the time, so there's no 1-minute floor).
- reconcile arms missing, cancels orphaned, skips paused.
- fire_due re-arms the next one-shot after a successful run, and repeat-N
(job gone) stops re-arming.
"""
import pytest
@pytest.fixture
def temp_home(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
yield tmp_path
@pytest.fixture
def chronos(monkeypatch):
"""A ChronosCronScheduler with a fake NAS client capturing calls."""
from plugins.cron.chronos import ChronosCronScheduler
class FakeClient:
def __init__(self):
self.provisions = []
self.cancels = []
self._armed = []
def provision(self, *, job_id, fire_at, agent_callback_url, dedup_key):
self.provisions.append({
"job_id": job_id, "fire_at": fire_at,
"agent_callback_url": agent_callback_url, "dedup_key": dedup_key,
})
return {"schedule_id": f"sched-{job_id}"}
def cancel(self, *, job_id):
self.cancels.append(job_id)
return {}
def list_armed(self):
return list(self._armed)
prov = ChronosCronScheduler()
fake = FakeClient()
prov._client = fake
# callback_url is read via _cfg; patch the module helper to avoid config.
monkeypatch.setattr("plugins.cron.chronos._cfg",
lambda *k, default="": "https://agent.example/" if k[-1] == "callback_url" else "https://portal.test")
return prov, fake
# -- is_available -------------------------------------------------------------
def test_is_available_false_without_config(temp_home, monkeypatch):
from plugins.cron.chronos import ChronosCronScheduler
monkeypatch.setattr("plugins.cron.chronos._cfg", lambda *k, default="": "")
assert ChronosCronScheduler().is_available() is False
def test_is_available_true_with_config_and_token(temp_home, monkeypatch):
import plugins.cron.chronos as mod
from plugins.cron.chronos import ChronosCronScheduler
monkeypatch.setattr(mod, "_cfg", lambda *k, default="": "https://x" )
monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state",
lambda pid: {"access_token": "tok"})
assert ChronosCronScheduler().is_available() is True
def test_is_available_makes_no_network(temp_home, monkeypatch):
"""is_available must not construct the NAS client / hit network."""
import plugins.cron.chronos as mod
from plugins.cron.chronos import ChronosCronScheduler
monkeypatch.setattr(mod, "_cfg", lambda *k, default="": "https://x")
monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state",
lambda pid: {"access_token": "tok"})
p = ChronosCronScheduler()
def explode():
raise AssertionError("is_available must not build the NAS client")
monkeypatch.setattr(p, "_get_client", explode)
assert p.is_available() is True # did not call _get_client
# -- arming -------------------------------------------------------------------
def test_arm_one_shot_sends_provision(chronos):
prov, fake = chronos
prov._arm_one_shot({"id": "j1", "next_run_at": "2026-06-18T12:00:00+00:00"})
assert len(fake.provisions) == 1
p = fake.provisions[0]
assert p["job_id"] == "j1"
assert p["fire_at"] == "2026-06-18T12:00:00+00:00"
assert p["dedup_key"] == "j1:2026-06-18T12:00:00+00:00"
assert p["agent_callback_url"] == "https://agent.example/"
def test_arm_one_shot_preserves_sub_minute_fire(chronos):
"""Sub-minute fire times survive — the agent owns the time, so there's no
1-minute scheduler floor."""
prov, fake = chronos
prov._arm_one_shot({"id": "j2", "next_run_at": "2026-06-18T12:00:30+00:00"})
assert fake.provisions[0]["fire_at"] == "2026-06-18T12:00:30+00:00"
def test_arm_one_shot_noop_without_next_run(chronos):
prov, fake = chronos
prov._arm_one_shot({"id": "j3", "next_run_at": None})
assert fake.provisions == []
# -- reconcile ----------------------------------------------------------------
def test_reconcile_arms_all_enabled(temp_home, chronos, monkeypatch):
prov, fake = chronos
jobs = [
{"id": "a", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "scheduled"},
{"id": "b", "enabled": True, "next_run_at": "2026-06-18T12:05:00+00:00", "state": "scheduled"},
]
monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
monkeypatch.setattr("cron.jobs.get_job", lambda jid: next(j for j in jobs if j["id"] == jid))
prov.reconcile()
assert {p["job_id"] for p in fake.provisions} == {"a", "b"}
assert fake.cancels == []
def test_reconcile_cancels_orphan_arms_desired(temp_home, chronos, monkeypatch):
prov, fake = chronos
# NAS already has a stale arm for deleted job "gone".
prov._armed = {"gone": "2026-06-18T11:00:00+00:00"}
jobs = [{"id": "a", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "scheduled"}]
monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
monkeypatch.setattr("cron.jobs.get_job", lambda jid: next((j for j in jobs if j["id"] == jid), None))
prov.reconcile()
assert [p["job_id"] for p in fake.provisions] == ["a"]
assert fake.cancels == ["gone"]
def test_reconcile_skips_paused(temp_home, chronos, monkeypatch):
prov, fake = chronos
jobs = [{"id": "p", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "paused"}]
monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
monkeypatch.setattr("cron.jobs.get_job", lambda jid: next((j for j in jobs if j["id"] == jid), None))
prov.reconcile()
assert fake.provisions == []
def test_reconcile_skips_already_armed_same_time(temp_home, chronos, monkeypatch):
prov, fake = chronos
prov._armed = {"a": "2026-06-18T12:00:00+00:00"}
jobs = [{"id": "a", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "scheduled"}]
monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
monkeypatch.setattr("cron.jobs.get_job", lambda jid: jobs[0])
prov.reconcile()
assert fake.provisions == [] # already armed at the same time → no re-arm
# -- fire_due re-arm ----------------------------------------------------------
def test_fire_due_rearms_next_oneshot(chronos, monkeypatch):
prov, fake = chronos
# super().fire_due runs the job; stub the ABC default to "ran".
monkeypatch.setattr("cron.scheduler_provider.CronScheduler.fire_due",
lambda self, jid, **kw: True)
monkeypatch.setattr("cron.jobs.get_job",
lambda jid: {"id": jid, "enabled": True, "next_run_at": "2026-06-18T12:05:00+00:00"})
assert prov.fire_due("j1") is True
assert [p["job_id"] for p in fake.provisions] == ["j1"]
assert fake.provisions[0]["fire_at"] == "2026-06-18T12:05:00+00:00"
def test_fire_due_no_rearm_when_job_gone(chronos, monkeypatch):
"""repeat-N exhausted / one-shot completed → mark_job_run deleted the job →
get_job None → no re-arm (the schedule stops cleanly)."""
prov, fake = chronos
monkeypatch.setattr("cron.scheduler_provider.CronScheduler.fire_due",
lambda self, jid, **kw: True)
monkeypatch.setattr("cron.jobs.get_job", lambda jid: None)
assert prov.fire_due("j1") is True
assert fake.provisions == []
def test_fire_due_no_rearm_when_claim_lost(chronos, monkeypatch):
"""If the run didn't happen (claim lost), don't re-arm."""
prov, fake = chronos
monkeypatch.setattr("cron.scheduler_provider.CronScheduler.fire_due",
lambda self, jid, **kw: False)
assert prov.fire_due("j1") is False
assert fake.provisions == []