diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index 0f3b5a272b3..1f806050ad9 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -57,6 +57,21 @@ def _cron_api(**kwargs): return json.loads(cronjob_tool(**kwargs)) +def _active_cron_provider_name() -> str: + """Name of the resolved cron scheduler provider ('builtin', 'chronos', …). + + Best-effort + offline (``resolve_cron_scheduler`` reads config and the + provider's ``is_available()`` contract forbids network). Returns 'builtin' + on any failure so callers fall back to the historical ticker-based checks. + """ + try: + from cron.scheduler_provider import resolve_cron_scheduler + + return resolve_cron_scheduler().name or "builtin" + except Exception: + return "builtin" + + def _warn_if_gateway_not_running() -> None: """Warn that scheduled jobs won't fire unless the gateway is running. @@ -65,8 +80,17 @@ def _warn_if_gateway_not_running() -> None: gateway, ``next_run_at`` passes but jobs never fire and ``last_run_at`` stays null — the most common cron support report (#51038). Surfacing this at create/list time, when the user is right there, prevents it. + + An external provider (e.g. Chronos) fires jobs via a NAS-mediated webhook, + NOT the in-process ticker, so a momentarily-absent gateway process does not + mean jobs won't fire — the warning would be a false alarm. Stay quiet for + any non-builtin provider; the gateway-process heuristic only speaks to the + built-in ticker's trigger. """ try: + if _active_cron_provider_name() != "builtin": + return + from hermes_cli.gateway import find_gateway_pids if find_gateway_pids(): @@ -177,6 +201,30 @@ def cron_status(): print() + provider = _active_cron_provider_name() + if provider != "builtin": + # An external provider (e.g. Chronos) does NOT run the in-process 60s + # ticker — it arms one external one-shot per job and is fired by a + # NAS-mediated webhook, so between fires there is intentionally NO + # ticker thread and NO heartbeat file. Reporting the ticker-heartbeat + # staleness here would always say "stalled / not firing" on a perfectly + # healthy Chronos instance. Report the provider instead and skip the + # ticker-liveness heuristics entirely. + print(color( + f"✓ Cron provider: {provider} — jobs fire via the managed scheduler, " + "not the in-process ticker.", + Colors.GREEN, + )) + print(color( + " (No ticker heartbeat is expected for an external provider; " + "due jobs are delivered by an authenticated webhook.)", + Colors.DIM, + )) + print() + _print_active_jobs_summary(list_jobs(include_disabled=False)) + print() + return + pids = find_gateway_pids() if pids: # The gateway PROCESS is alive — but the cron ticker THREAD inside it @@ -231,7 +279,14 @@ def cron_status(): print() - jobs = list_jobs(include_disabled=False) + _print_active_jobs_summary(list_jobs(include_disabled=False)) + + print() + + +def _print_active_jobs_summary(jobs) -> None: + """Print the ' active job(s)' + next-run line shared by every status + path (built-in ticker AND external provider).""" if jobs: next_runs = [j.get("next_run_at") for j in jobs if j.get("next_run_at")] print(f" {len(jobs)} active job(s)") @@ -240,8 +295,6 @@ def cron_status(): else: print(" No active jobs") - print() - def cron_create(args): # Defense: reject cron jobs that contain gateway lifecycle commands. diff --git a/tests/hermes_cli/test_cron.py b/tests/hermes_cli/test_cron.py index 1281589048b..8cd7ef39659 100644 --- a/tests/hermes_cli/test_cron.py +++ b/tests/hermes_cli/test_cron.py @@ -178,3 +178,73 @@ class TestGatewayNotRunningWarning: cron_command(Namespace(cron_command="list", all=True)) out = capsys.readouterr().out assert "Gateway is not running" in out + + +class TestExternalCronProviderStatus: + """With an external cron provider (e.g. Chronos), jobs fire via a + NAS-mediated webhook, NOT the in-process ticker. The ticker-heartbeat / + gateway-process heuristics are meaningless there, so neither + `cron status` nor the create/list warning must claim the gateway being + absent means jobs won't fire — that was a false-negative on every healthy + Chronos instance (the heartbeat is intentionally never written). + """ + + def test_status_reports_provider_not_ticker_for_chronos( + self, tmp_cron_dir, capsys, monkeypatch + ): + create_job(prompt="Ping", schedule="every 2m") + monkeypatch.setattr( + "hermes_cli.cron._active_cron_provider_name", lambda: "chronos" + ) + # Even with NO gateway process and NO ticker heartbeat, Chronos status + # must NOT report a stall / "not firing". + monkeypatch.setattr("hermes_cli.gateway.find_gateway_pids", lambda: []) + cron_command(Namespace(cron_command="status")) + out = capsys.readouterr().out + assert "chronos" in out + assert "managed scheduler" in out + assert "not firing" not in out.lower() + assert "STALLED" not in out + assert "Gateway is not running" not in out + # Still surfaces the active-job summary. + assert "active job(s)" in out + + def test_status_unchanged_for_builtin(self, tmp_cron_dir, capsys, monkeypatch): + create_job(prompt="Ping", schedule="every 2m") + monkeypatch.setattr( + "hermes_cli.cron._active_cron_provider_name", lambda: "builtin" + ) + monkeypatch.setattr("hermes_cli.gateway.find_gateway_pids", lambda: []) + cron_command(Namespace(cron_command="status")) + out = capsys.readouterr().out + # Built-in path is the historical ticker-based report. + assert "Gateway is not running" in out + assert "managed scheduler" not in out + + def test_create_silent_for_chronos_even_without_gateway( + self, tmp_cron_dir, capsys, monkeypatch + ): + # The create-time "gateway not running" nag is a ticker-only concern; + # an external provider doesn't depend on a live in-process ticker. + monkeypatch.setattr( + "hermes_cli.cron._active_cron_provider_name", lambda: "chronos" + ) + monkeypatch.setattr("hermes_cli.gateway.find_gateway_pids", lambda: []) + cron_command( + Namespace( + cron_command="create", + schedule="every 2m", + prompt="Ping", + name="Ping", + deliver=None, + repeat=None, + skill=None, + skills=None, + script=None, + workdir=None, + no_agent=False, + ) + ) + out = capsys.readouterr().out + assert "Created job" in out + assert "Gateway is not running" not in out