diff --git a/cron/jobs.py b/cron/jobs.py
index 6ec6d5be123..fd7bbd8a8a2 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -755,6 +755,45 @@ def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
return str(resolved)
+def _resolve_default_model_snapshot() -> Optional[str]:
+ """Resolve the global default model the same way the cron ticker does.
+
+ Mirrors the unpinned-model resolution in ``cron/scheduler.py`` ``run_job``:
+ read ``config.yaml`` ``model.default`` (or the ``model`` alias / bare string
+ form), applying the managed-scope overlay and env expansion. Used by
+ ``create_job`` to snapshot the default model for unpinned jobs so a later
+ swap of the global default is detected at fire time (#44585).
+
+ Returns the resolved model string, or ``None`` if config is missing/empty
+ or resolution fails (fail-open — caller treats ``None`` as "no snapshot").
+ """
+ try:
+ import yaml
+ from hermes_cli.config import _expand_env_vars
+
+ cfg_path = get_hermes_home() / "config.yaml"
+ if not cfg_path.exists():
+ return None
+ with cfg_path.open(encoding="utf-8") as f:
+ cfg = yaml.safe_load(f) or {}
+ try:
+ from hermes_cli import managed_scope
+ cfg = managed_scope.apply_managed_overlay(cfg)
+ except Exception:
+ pass
+ cfg = _expand_env_vars(cfg)
+ model_cfg = cfg.get("model") or {}
+ if isinstance(model_cfg, str):
+ return model_cfg.strip() or None
+ if isinstance(model_cfg, dict):
+ default = model_cfg.get("default") or model_cfg.get("model")
+ if isinstance(default, str):
+ return default.strip() or None
+ return None
+ except Exception:
+ return None
+
+
def create_job(
prompt: Optional[str],
schedule: str,
@@ -870,6 +909,47 @@ def create_job(
prompt_text = _coerce_job_text(prompt)
label_source = (prompt_text or (normalized_skills[0] if normalized_skills else None) or (normalized_script if normalized_no_agent else None)) or "cron job"
+
+ # Provider/model-drift guard (#44585). When the caller does NOT pin a
+ # provider and/or model, the job follows the global default — model.default
+ # in config.yaml and whatever resolve_runtime_provider() picks at fire time.
+ # That global state can change (e.g. a temporary switch to a paid provider
+ # OR a paid model like claude-fable-5 on the SAME provider), and an unpinned
+ # job would then silently inherit it and spend real money. To detect that,
+ # snapshot what resolution WOULD pick *right now*, at creation, for each
+ # axis the job leaves unpinned. The fire-time guard (run_job) fails closed
+ # when an unpinned job's currently-resolved provider OR model differs from
+ # its snapshot.
+ #
+ # Only captured for agent-backed jobs (no_agent script jobs make no paid
+ # inference). Each axis is snapshotted only when that axis is unpinned —
+ # a pinned provider/model doesn't drift with global state. Fail-open to None
+ # on any resolution error so job creation never breaks; a missing snapshot
+ # preserves the legacy no-guard behaviour for that axis.
+ provider_snapshot: Optional[str] = None
+ model_snapshot: Optional[str] = None
+ if not normalized_no_agent:
+ if normalized_provider is None:
+ try:
+ from hermes_cli.runtime_provider import resolve_runtime_provider
+ _runtime_kwargs = {"requested": None}
+ if normalized_base_url:
+ _runtime_kwargs["explicit_base_url"] = normalized_base_url
+ _snap = resolve_runtime_provider(**_runtime_kwargs)
+ _snap_provider = str(_snap.get("provider") or "").strip().lower()
+ provider_snapshot = _snap_provider or None
+ except Exception:
+ provider_snapshot = None
+ if normalized_model is None:
+ # Mirror the fire-time unpinned-model resolution (run_job reads
+ # config.yaml model.default / model). Capture that value so a later
+ # swap of the global default model is detected even when the
+ # provider is unchanged (e.g. a premium model on the same endpoint).
+ try:
+ model_snapshot = _resolve_default_model_snapshot() or None
+ except Exception:
+ model_snapshot = None
+
job = {
"id": job_id,
"name": name or label_source[:50].strip(),
@@ -878,6 +958,11 @@ def create_job(
"skill": normalized_skills[0] if normalized_skills else None,
"model": normalized_model,
"provider": normalized_provider,
+ # Provider/model resolution captured at creation for unpinned jobs
+ # (#44585). None for pinned axes, no_agent jobs, resolution failures, and
+ # any pre-existing job written before these fields existed (back-compat).
+ "provider_snapshot": provider_snapshot,
+ "model_snapshot": model_snapshot,
"base_url": normalized_base_url,
"script": normalized_script,
"no_agent": normalized_no_agent,
diff --git a/cron/scheduler.py b/cron/scheduler.py
index c48935c84a6..b3c3f95c5b7 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -2016,6 +2016,60 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
+ # Provider/model-drift fail-closed guard (#44585).
+ #
+ # An UNPINNED job (no explicit job["provider"]/["model"]) follows the
+ # global default, which can change after the job was created — a switch
+ # to a paid PROVIDER (e.g. nous) OR a paid MODEL on the same provider
+ # (e.g. claude-fable-5 on openrouter). Without a guard the job would
+ # silently inherit that change and spend real money on every tick — the
+ # $7.73 incident named BOTH a provider and a model.
+ #
+ # create_job() snapshots whatever resolution would have picked at
+ # creation for each unpinned axis (job["provider_snapshot"] /
+ # job["model_snapshot"]). Here, for each axis that (a) has a snapshot and
+ # (b) is unpinned and (c) currently resolves to a DIFFERENT value, we
+ # fail closed: skip this run, make NO paid call, and deliver a loud,
+ # actionable alert telling the user to pin the axis explicitly.
+ #
+ # Back-compat: an axis with no snapshot (pre-existing jobs, no_agent, or
+ # any axis whose creation-time resolution failed) behaves exactly as
+ # before — the guard never engages for it. Pinned axes are unaffected.
+ _drift: list[str] = []
+ _provider_snapshot = (job.get("provider_snapshot") or "").strip().lower()
+ if _provider_snapshot and not (job.get("provider") or "").strip():
+ _current_provider = str(runtime.get("provider") or "").strip().lower()
+ if _current_provider and _current_provider != _provider_snapshot:
+ _drift.append(
+ f"provider '{_provider_snapshot}' -> '{_current_provider}'"
+ )
+ _model_snapshot = (job.get("model_snapshot") or "").strip().lower()
+ if _model_snapshot and not (job.get("model") or "").strip():
+ _current_model = str(model or "").strip().lower()
+ if _current_model and _current_model != _model_snapshot:
+ _drift.append(
+ f"model '{_model_snapshot}' -> '{_current_model}'"
+ )
+ if _drift:
+ _changes = "; ".join(_drift)
+ logger.warning(
+ "Job '%s': SKIPPED — global inference config drifted since "
+ "creation (%s) and this job is unpinned. Skipped to prevent "
+ "unintended spend. Pin explicitly to proceed: "
+ "`cronjob action=update job_id=%s provider=
model=`.",
+ job_id,
+ _changes,
+ job_id,
+ )
+ raise RuntimeError(
+ f"Skipped to prevent unintended spend: global inference config "
+ f"drifted since this job was created ({_changes}), and this job "
+ f"is unpinned. No inference call was made. To run on the new "
+ f"config, pin it explicitly: `cronjob action=update "
+ f"job_id={job_id} provider= model=` "
+ f"(or pin the original values to keep them). See #44585."
+ )
+
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
credential_pool = None
runtime_provider = str(runtime.get("provider") or "").strip().lower()
diff --git a/tests/cron/test_cron_provider_pin.py b/tests/cron/test_cron_provider_pin.py
new file mode 100644
index 00000000000..e5d06cc212d
--- /dev/null
+++ b/tests/cron/test_cron_provider_pin.py
@@ -0,0 +1,334 @@
+"""Provider-drift fail-closed guard for cron jobs (#44585).
+
+Background: an UNPINNED cron job follows the global default provider. If that
+global state is changed (e.g. a temporary switch to a paid provider like
+nous/claude-fable-5), the job would silently inherit it on its next tick and
+spend real money — the $7.73 incident.
+
+The fix has two halves:
+ - create_job() snapshots the provider resolution WOULD pick at creation into
+ job["provider_snapshot"] (only for unpinned, agent-backed jobs).
+ - run_job() fails closed when an unpinned job's CURRENTLY-resolved provider
+ differs from that snapshot: it skips the run, makes no paid call, and
+ delivers a loud actionable error.
+
+These tests exercise the full run_job path (real imports, mocked AIAgent +
+resolve_runtime_provider against a temp HERMES_HOME) and the create_job
+snapshot capture. They are load-bearing: without the guard, cases (b) call the
+agent and "succeed" instead of failing closed.
+"""
+
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Ensure project root is importable.
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from cron.scheduler import run_job
+
+
+def _base_job(**overrides):
+ job = {
+ "id": "pin-test",
+ "name": "pin test",
+ "prompt": "hello",
+ "model": None,
+ "provider": None,
+ "provider_snapshot": None,
+ "base_url": None,
+ }
+ job.update(overrides)
+ return job
+
+
+def _run_with_current_provider(job, current_provider, tmp_path):
+ """Drive run_job with resolve_runtime_provider pinned to ``current_provider``.
+
+ Returns (success, output, final_response, error, agent_constructed).
+ """
+ fake_db = MagicMock()
+ with patch("cron.scheduler._hermes_home", tmp_path), \
+ patch("cron.scheduler._resolve_origin", return_value=None), \
+ patch("dotenv.load_dotenv"), \
+ patch("hermes_state.SessionDB", return_value=fake_db), \
+ patch(
+ "hermes_cli.runtime_provider.resolve_runtime_provider",
+ return_value={
+ "api_key": "test-key",
+ "base_url": "https://example.invalid/v1",
+ "provider": current_provider,
+ "api_mode": "chat_completions",
+ },
+ ), \
+ patch("run_agent.AIAgent") as mock_agent_cls:
+ mock_agent = MagicMock()
+ mock_agent.run_conversation.return_value = {"final_response": "ok"}
+ mock_agent_cls.return_value = mock_agent
+
+ success, output, final_response, error = run_job(job)
+ agent_constructed = mock_agent_cls.called
+
+ return success, output, final_response, error, agent_constructed
+
+
+class TestProviderDriftGuard:
+ def test_a_unpinned_snapshot_matches_runs_normally(self, tmp_path):
+ """(a) Unpinned job whose snapshot == current provider → runs normally."""
+ job = _base_job(provider_snapshot="openrouter")
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider(job, "openrouter", tmp_path)
+
+ assert success is True
+ assert error is None
+ assert final_response == "ok"
+ assert agent_constructed is True
+
+ def test_b_unpinned_snapshot_differs_fails_closed(self, tmp_path):
+ """(b) Unpinned job whose snapshot != current provider → fail closed.
+
+ The paid call must NOT be made (AIAgent never constructed) and the
+ delivered error must name both providers and tell the user to pin.
+ """
+ job = _base_job(provider_snapshot="openrouter")
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider(job, "nous", tmp_path)
+
+ # Fail closed: no agent constructed, no inference call.
+ assert agent_constructed is False
+ assert success is False
+ assert error is not None
+
+ # Loud + actionable: names both providers, mentions spend + pinning.
+ blob = f"{error}\n{output}".lower()
+ assert "openrouter" in blob
+ assert "nous" in blob
+ assert "spend" in blob
+ assert "cronjob action=update" in blob
+ assert "44585" in blob
+
+ def test_c_no_snapshot_runs_backcompat(self, tmp_path):
+ """(c) Pre-existing job with NO provider_snapshot → runs (back-compat).
+
+ Even though the current provider differs from anything, a job without a
+ snapshot must behave exactly as before this fix: the guard never engages.
+ """
+ # A job dict that predates the field entirely (key absent, not None).
+ job = _base_job()
+ job.pop("provider_snapshot", None)
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider(job, "nous", tmp_path)
+
+ assert success is True
+ assert error is None
+ assert agent_constructed is True
+
+ def test_c2_snapshot_none_runs_backcompat(self, tmp_path):
+ """(c') Job with provider_snapshot explicitly None → runs (back-compat)."""
+ job = _base_job(provider_snapshot=None)
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider(job, "nous", tmp_path)
+
+ assert success is True
+ assert error is None
+ assert agent_constructed is True
+
+ def test_d_explicitly_pinned_runs_regardless_of_drift(self, tmp_path):
+ """(d) Explicitly-pinned job (job["provider"] set) → runs regardless.
+
+ A pinned job does not follow global state, so even a snapshot/current
+ mismatch must not skip it. (Snapshot would normally be None for pinned
+ jobs, but we set a mismatching one to prove the pin wins.)
+ """
+ job = _base_job(provider="openrouter", provider_snapshot="anthropic")
+ # Current resolution differs from the (stale) snapshot, but the job is
+ # pinned, so the guard must not engage.
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider(job, "nous", tmp_path)
+
+ assert success is True
+ assert error is None
+ assert agent_constructed is True
+
+
+class TestCreateJobSnapshot:
+ """create_job captures provider_snapshot for unpinned agent jobs only."""
+
+ @staticmethod
+ def _isolate_storage(monkeypatch):
+ """Patch cron.jobs storage so create_job never touches the real store."""
+ import contextlib
+ import cron.jobs as jobs
+
+ @contextlib.contextmanager
+ def _noop_lock():
+ yield
+
+ monkeypatch.setattr(jobs, "_jobs_lock", _noop_lock, raising=True)
+ monkeypatch.setattr(jobs, "load_jobs", lambda: [], raising=True)
+ monkeypatch.setattr(jobs, "save_jobs", lambda j: None, raising=True)
+ return jobs
+
+ def test_unpinned_job_captures_snapshot(self, monkeypatch):
+ jobs = self._isolate_storage(monkeypatch)
+
+ with patch(
+ "hermes_cli.runtime_provider.resolve_runtime_provider",
+ return_value={"provider": "openrouter"},
+ ):
+ job = jobs.create_job(prompt="do a thing", schedule="every 1 hour")
+
+ assert job["provider"] is None
+ assert job["provider_snapshot"] == "openrouter"
+
+ def test_pinned_job_skips_snapshot(self, monkeypatch):
+ jobs = self._isolate_storage(monkeypatch)
+
+ resolver = MagicMock(return_value={"provider": "openrouter"})
+ with patch("hermes_cli.runtime_provider.resolve_runtime_provider", resolver):
+ job = jobs.create_job(
+ prompt="do a thing", schedule="every 1 hour", provider="nous"
+ )
+
+ # Explicit provider → pinned → no snapshot needed, and resolution skipped.
+ assert job["provider"] == "nous"
+ assert job["provider_snapshot"] is None
+ resolver.assert_not_called()
+
+ def test_snapshot_resolution_error_fails_open_to_none(self, monkeypatch):
+ """If resolution raises at creation, snapshot is None — creation never breaks."""
+ jobs = self._isolate_storage(monkeypatch)
+
+ with patch(
+ "hermes_cli.runtime_provider.resolve_runtime_provider",
+ side_effect=RuntimeError("no creds"),
+ ):
+ job = jobs.create_job(prompt="do a thing", schedule="every 1 hour")
+
+ assert job["provider_snapshot"] is None
+
+ def test_unpinned_model_captures_model_snapshot(self, monkeypatch, tmp_path):
+ """An unpinned model captures config.yaml model.default into model_snapshot."""
+ jobs = self._isolate_storage(monkeypatch)
+ (tmp_path / "config.yaml").write_text("model:\n default: llama-3.3-70b:free\n")
+ monkeypatch.setattr(
+ "cron.jobs.get_hermes_home", lambda: tmp_path, raising=True
+ )
+ with patch(
+ "hermes_cli.runtime_provider.resolve_runtime_provider",
+ return_value={"provider": "openrouter"},
+ ):
+ job = jobs.create_job(prompt="do a thing", schedule="every 1 hour")
+ assert job["model"] is None
+ assert job["model_snapshot"] == "llama-3.3-70b:free"
+
+ def test_pinned_model_skips_model_snapshot(self, monkeypatch, tmp_path):
+ """An explicit model → pinned → no model_snapshot captured."""
+ jobs = self._isolate_storage(monkeypatch)
+ (tmp_path / "config.yaml").write_text("model:\n default: llama-3.3-70b:free\n")
+ monkeypatch.setattr(
+ "cron.jobs.get_hermes_home", lambda: tmp_path, raising=True
+ )
+ with patch(
+ "hermes_cli.runtime_provider.resolve_runtime_provider",
+ return_value={"provider": "openrouter"},
+ ):
+ job = jobs.create_job(
+ prompt="do a thing", schedule="every 1 hour", model="my-model"
+ )
+ assert job["model"] == "my-model"
+ assert job["model_snapshot"] is None
+
+
+def _run_with_current_provider_and_model(job, current_provider, current_model, tmp_path):
+ """Drive run_job with resolved provider pinned and config.yaml model.default
+ set to ``current_model`` (the unpinned-model fire-time source)."""
+ (tmp_path / "config.yaml").write_text(
+ f"model:\n default: {current_model}\n"
+ )
+ fake_db = MagicMock()
+ with patch("cron.scheduler._hermes_home", tmp_path), \
+ patch("cron.scheduler._get_hermes_home", return_value=tmp_path), \
+ patch("cron.scheduler._resolve_origin", return_value=None), \
+ patch("dotenv.load_dotenv"), \
+ patch("hermes_state.SessionDB", return_value=fake_db), \
+ patch(
+ "hermes_cli.runtime_provider.resolve_runtime_provider",
+ return_value={
+ "api_key": "test-key",
+ "base_url": "https://example.invalid/v1",
+ "provider": current_provider,
+ "api_mode": "chat_completions",
+ },
+ ), \
+ patch("run_agent.AIAgent") as mock_agent_cls:
+ mock_agent = MagicMock()
+ mock_agent.run_conversation.return_value = {"final_response": "ok"}
+ mock_agent_cls.return_value = mock_agent
+ success, output, final_response, error = run_job(job)
+ agent_constructed = mock_agent_cls.called
+ return success, output, final_response, error, agent_constructed
+
+
+class TestModelDriftGuard:
+ """#44585 C1: model drift on the SAME provider must also fail closed —
+ the incident named a model (claude-fable-5), and an unpinned job reads
+ config.yaml model.default fresh every tick independently of provider."""
+
+ def test_model_drift_same_provider_fails_closed(self, tmp_path):
+ # Provider unchanged (openrouter==openrouter), but the global default
+ # MODEL swapped to a premium model since creation → must fail closed.
+ job = _base_job(
+ provider_snapshot="openrouter",
+ model_snapshot="llama-3.3-70b-instruct:free",
+ )
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider_and_model(
+ job, "openrouter", "claude-fable-5", tmp_path
+ )
+ assert agent_constructed is False, "paid call must not be made on model drift"
+ assert success is False
+ blob = f"{error}\n{output}".lower()
+ assert "claude-fable-5" in blob
+ assert "llama-3.3-70b-instruct:free" in blob
+ assert "44585" in blob
+
+ def test_model_snapshot_matches_runs(self, tmp_path):
+ # Default model unchanged → runs normally.
+ job = _base_job(
+ provider_snapshot="openrouter",
+ model_snapshot="llama-3.3-70b-instruct:free",
+ )
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider_and_model(
+ job, "openrouter", "llama-3.3-70b-instruct:free", tmp_path
+ )
+ assert agent_constructed is True
+ assert success is True
+
+ def test_pinned_model_bypasses_guard(self, tmp_path):
+ # Explicit job["model"] → not unpinned → no model-drift skip even if the
+ # global default differs from any snapshot.
+ job = _base_job(
+ provider_snapshot="openrouter",
+ model_snapshot="old-model",
+ model="my-pinned-model",
+ )
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider_and_model(
+ job, "openrouter", "claude-fable-5", tmp_path
+ )
+ assert agent_constructed is True
+ assert success is True
+
+ def test_no_model_snapshot_backcompat(self, tmp_path):
+ # Pre-existing job without model_snapshot → no model-drift skip.
+ job = _base_job(provider_snapshot="openrouter") # no model_snapshot key set to a value
+ success, output, final_response, error, agent_constructed = \
+ _run_with_current_provider_and_model(
+ job, "openrouter", "claude-fable-5", tmp_path
+ )
+ assert agent_constructed is True
+ assert success is True
diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md
index 89357043d70..c3444f98d38 100644
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@@ -22,7 +22,7 @@ Cron jobs can:
All of this is available to Hermes itself through the `cronjob` tool, so you can create, pause, edit, and remove jobs by asking in plain language — no CLI required.
:::tip
-Cron jobs use whatever provider `hermes model` selected. `hermes setup --portal` is the lowest-friction option for unattended runs since OAuth refresh is automatic. See [Nous Portal](/integrations/nous-portal).
+At creation, an unpinned job (one you don't give an explicit `provider`/`model`) follows the global default selected by `hermes model` — and Hermes **snapshots** that provider and model on the job. If the global default later changes, the job **fails closed**: it skips the run, makes no inference call, and sends an alert telling you to pin the provider/model explicitly (`cronjob action=update job_id=… provider=… model=…`) to proceed. This prevents an unattended job from silently inheriting a switch to a paid provider/model and spending money you didn't intend (#44585). To make a job deliberately track your global default, pin it to the new values after changing them. `hermes setup --portal` is the lowest-friction option for unattended runs since OAuth refresh is automatic. See [Nous Portal](/integrations/nous-portal).
:::
:::warning