diff --git a/cron/jobs.py b/cron/jobs.py index 6ec6d5be123..fd7bbd8a8a2 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -755,6 +755,45 @@ def _normalize_workdir(workdir: Optional[str]) -> Optional[str]: return str(resolved) +def _resolve_default_model_snapshot() -> Optional[str]: + """Resolve the global default model the same way the cron ticker does. + + Mirrors the unpinned-model resolution in ``cron/scheduler.py`` ``run_job``: + read ``config.yaml`` ``model.default`` (or the ``model`` alias / bare string + form), applying the managed-scope overlay and env expansion. Used by + ``create_job`` to snapshot the default model for unpinned jobs so a later + swap of the global default is detected at fire time (#44585). + + Returns the resolved model string, or ``None`` if config is missing/empty + or resolution fails (fail-open — caller treats ``None`` as "no snapshot"). + """ + try: + import yaml + from hermes_cli.config import _expand_env_vars + + cfg_path = get_hermes_home() / "config.yaml" + if not cfg_path.exists(): + return None + with cfg_path.open(encoding="utf-8") as f: + cfg = yaml.safe_load(f) or {} + try: + from hermes_cli import managed_scope + cfg = managed_scope.apply_managed_overlay(cfg) + except Exception: + pass + cfg = _expand_env_vars(cfg) + model_cfg = cfg.get("model") or {} + if isinstance(model_cfg, str): + return model_cfg.strip() or None + if isinstance(model_cfg, dict): + default = model_cfg.get("default") or model_cfg.get("model") + if isinstance(default, str): + return default.strip() or None + return None + except Exception: + return None + + def create_job( prompt: Optional[str], schedule: str, @@ -870,6 +909,47 @@ def create_job( prompt_text = _coerce_job_text(prompt) label_source = (prompt_text or (normalized_skills[0] if normalized_skills else None) or (normalized_script if normalized_no_agent else None)) or "cron job" + + # Provider/model-drift guard (#44585). When the caller does NOT pin a + # provider and/or model, the job follows the global default — model.default + # in config.yaml and whatever resolve_runtime_provider() picks at fire time. + # That global state can change (e.g. a temporary switch to a paid provider + # OR a paid model like claude-fable-5 on the SAME provider), and an unpinned + # job would then silently inherit it and spend real money. To detect that, + # snapshot what resolution WOULD pick *right now*, at creation, for each + # axis the job leaves unpinned. The fire-time guard (run_job) fails closed + # when an unpinned job's currently-resolved provider OR model differs from + # its snapshot. + # + # Only captured for agent-backed jobs (no_agent script jobs make no paid + # inference). Each axis is snapshotted only when that axis is unpinned — + # a pinned provider/model doesn't drift with global state. Fail-open to None + # on any resolution error so job creation never breaks; a missing snapshot + # preserves the legacy no-guard behaviour for that axis. + provider_snapshot: Optional[str] = None + model_snapshot: Optional[str] = None + if not normalized_no_agent: + if normalized_provider is None: + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + _runtime_kwargs = {"requested": None} + if normalized_base_url: + _runtime_kwargs["explicit_base_url"] = normalized_base_url + _snap = resolve_runtime_provider(**_runtime_kwargs) + _snap_provider = str(_snap.get("provider") or "").strip().lower() + provider_snapshot = _snap_provider or None + except Exception: + provider_snapshot = None + if normalized_model is None: + # Mirror the fire-time unpinned-model resolution (run_job reads + # config.yaml model.default / model). Capture that value so a later + # swap of the global default model is detected even when the + # provider is unchanged (e.g. a premium model on the same endpoint). + try: + model_snapshot = _resolve_default_model_snapshot() or None + except Exception: + model_snapshot = None + job = { "id": job_id, "name": name or label_source[:50].strip(), @@ -878,6 +958,11 @@ def create_job( "skill": normalized_skills[0] if normalized_skills else None, "model": normalized_model, "provider": normalized_provider, + # Provider/model resolution captured at creation for unpinned jobs + # (#44585). None for pinned axes, no_agent jobs, resolution failures, and + # any pre-existing job written before these fields existed (back-compat). + "provider_snapshot": provider_snapshot, + "model_snapshot": model_snapshot, "base_url": normalized_base_url, "script": normalized_script, "no_agent": normalized_no_agent, diff --git a/cron/scheduler.py b/cron/scheduler.py index c48935c84a6..b3c3f95c5b7 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -2016,6 +2016,60 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc + # Provider/model-drift fail-closed guard (#44585). + # + # An UNPINNED job (no explicit job["provider"]/["model"]) follows the + # global default, which can change after the job was created — a switch + # to a paid PROVIDER (e.g. nous) OR a paid MODEL on the same provider + # (e.g. claude-fable-5 on openrouter). Without a guard the job would + # silently inherit that change and spend real money on every tick — the + # $7.73 incident named BOTH a provider and a model. + # + # create_job() snapshots whatever resolution would have picked at + # creation for each unpinned axis (job["provider_snapshot"] / + # job["model_snapshot"]). Here, for each axis that (a) has a snapshot and + # (b) is unpinned and (c) currently resolves to a DIFFERENT value, we + # fail closed: skip this run, make NO paid call, and deliver a loud, + # actionable alert telling the user to pin the axis explicitly. + # + # Back-compat: an axis with no snapshot (pre-existing jobs, no_agent, or + # any axis whose creation-time resolution failed) behaves exactly as + # before — the guard never engages for it. Pinned axes are unaffected. + _drift: list[str] = [] + _provider_snapshot = (job.get("provider_snapshot") or "").strip().lower() + if _provider_snapshot and not (job.get("provider") or "").strip(): + _current_provider = str(runtime.get("provider") or "").strip().lower() + if _current_provider and _current_provider != _provider_snapshot: + _drift.append( + f"provider '{_provider_snapshot}' -> '{_current_provider}'" + ) + _model_snapshot = (job.get("model_snapshot") or "").strip().lower() + if _model_snapshot and not (job.get("model") or "").strip(): + _current_model = str(model or "").strip().lower() + if _current_model and _current_model != _model_snapshot: + _drift.append( + f"model '{_model_snapshot}' -> '{_current_model}'" + ) + if _drift: + _changes = "; ".join(_drift) + logger.warning( + "Job '%s': SKIPPED — global inference config drifted since " + "creation (%s) and this job is unpinned. Skipped to prevent " + "unintended spend. Pin explicitly to proceed: " + "`cronjob action=update job_id=%s provider=

model=`.", + job_id, + _changes, + job_id, + ) + raise RuntimeError( + f"Skipped to prevent unintended spend: global inference config " + f"drifted since this job was created ({_changes}), and this job " + f"is unpinned. No inference call was made. To run on the new " + f"config, pin it explicitly: `cronjob action=update " + f"job_id={job_id} provider= model=` " + f"(or pin the original values to keep them). See #44585." + ) + fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None credential_pool = None runtime_provider = str(runtime.get("provider") or "").strip().lower() diff --git a/tests/cron/test_cron_provider_pin.py b/tests/cron/test_cron_provider_pin.py new file mode 100644 index 00000000000..e5d06cc212d --- /dev/null +++ b/tests/cron/test_cron_provider_pin.py @@ -0,0 +1,334 @@ +"""Provider-drift fail-closed guard for cron jobs (#44585). + +Background: an UNPINNED cron job follows the global default provider. If that +global state is changed (e.g. a temporary switch to a paid provider like +nous/claude-fable-5), the job would silently inherit it on its next tick and +spend real money — the $7.73 incident. + +The fix has two halves: + - create_job() snapshots the provider resolution WOULD pick at creation into + job["provider_snapshot"] (only for unpinned, agent-backed jobs). + - run_job() fails closed when an unpinned job's CURRENTLY-resolved provider + differs from that snapshot: it skips the run, makes no paid call, and + delivers a loud actionable error. + +These tests exercise the full run_job path (real imports, mocked AIAgent + +resolve_runtime_provider against a temp HERMES_HOME) and the create_job +snapshot capture. They are load-bearing: without the guard, cases (b) call the +agent and "succeed" instead of failing closed. +""" + +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +# Ensure project root is importable. +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from cron.scheduler import run_job + + +def _base_job(**overrides): + job = { + "id": "pin-test", + "name": "pin test", + "prompt": "hello", + "model": None, + "provider": None, + "provider_snapshot": None, + "base_url": None, + } + job.update(overrides) + return job + + +def _run_with_current_provider(job, current_provider, tmp_path): + """Drive run_job with resolve_runtime_provider pinned to ``current_provider``. + + Returns (success, output, final_response, error, agent_constructed). + """ + fake_db = MagicMock() + with patch("cron.scheduler._hermes_home", tmp_path), \ + patch("cron.scheduler._resolve_origin", return_value=None), \ + patch("dotenv.load_dotenv"), \ + patch("hermes_state.SessionDB", return_value=fake_db), \ + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={ + "api_key": "test-key", + "base_url": "https://example.invalid/v1", + "provider": current_provider, + "api_mode": "chat_completions", + }, + ), \ + patch("run_agent.AIAgent") as mock_agent_cls: + mock_agent = MagicMock() + mock_agent.run_conversation.return_value = {"final_response": "ok"} + mock_agent_cls.return_value = mock_agent + + success, output, final_response, error = run_job(job) + agent_constructed = mock_agent_cls.called + + return success, output, final_response, error, agent_constructed + + +class TestProviderDriftGuard: + def test_a_unpinned_snapshot_matches_runs_normally(self, tmp_path): + """(a) Unpinned job whose snapshot == current provider → runs normally.""" + job = _base_job(provider_snapshot="openrouter") + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider(job, "openrouter", tmp_path) + + assert success is True + assert error is None + assert final_response == "ok" + assert agent_constructed is True + + def test_b_unpinned_snapshot_differs_fails_closed(self, tmp_path): + """(b) Unpinned job whose snapshot != current provider → fail closed. + + The paid call must NOT be made (AIAgent never constructed) and the + delivered error must name both providers and tell the user to pin. + """ + job = _base_job(provider_snapshot="openrouter") + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider(job, "nous", tmp_path) + + # Fail closed: no agent constructed, no inference call. + assert agent_constructed is False + assert success is False + assert error is not None + + # Loud + actionable: names both providers, mentions spend + pinning. + blob = f"{error}\n{output}".lower() + assert "openrouter" in blob + assert "nous" in blob + assert "spend" in blob + assert "cronjob action=update" in blob + assert "44585" in blob + + def test_c_no_snapshot_runs_backcompat(self, tmp_path): + """(c) Pre-existing job with NO provider_snapshot → runs (back-compat). + + Even though the current provider differs from anything, a job without a + snapshot must behave exactly as before this fix: the guard never engages. + """ + # A job dict that predates the field entirely (key absent, not None). + job = _base_job() + job.pop("provider_snapshot", None) + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider(job, "nous", tmp_path) + + assert success is True + assert error is None + assert agent_constructed is True + + def test_c2_snapshot_none_runs_backcompat(self, tmp_path): + """(c') Job with provider_snapshot explicitly None → runs (back-compat).""" + job = _base_job(provider_snapshot=None) + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider(job, "nous", tmp_path) + + assert success is True + assert error is None + assert agent_constructed is True + + def test_d_explicitly_pinned_runs_regardless_of_drift(self, tmp_path): + """(d) Explicitly-pinned job (job["provider"] set) → runs regardless. + + A pinned job does not follow global state, so even a snapshot/current + mismatch must not skip it. (Snapshot would normally be None for pinned + jobs, but we set a mismatching one to prove the pin wins.) + """ + job = _base_job(provider="openrouter", provider_snapshot="anthropic") + # Current resolution differs from the (stale) snapshot, but the job is + # pinned, so the guard must not engage. + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider(job, "nous", tmp_path) + + assert success is True + assert error is None + assert agent_constructed is True + + +class TestCreateJobSnapshot: + """create_job captures provider_snapshot for unpinned agent jobs only.""" + + @staticmethod + def _isolate_storage(monkeypatch): + """Patch cron.jobs storage so create_job never touches the real store.""" + import contextlib + import cron.jobs as jobs + + @contextlib.contextmanager + def _noop_lock(): + yield + + monkeypatch.setattr(jobs, "_jobs_lock", _noop_lock, raising=True) + monkeypatch.setattr(jobs, "load_jobs", lambda: [], raising=True) + monkeypatch.setattr(jobs, "save_jobs", lambda j: None, raising=True) + return jobs + + def test_unpinned_job_captures_snapshot(self, monkeypatch): + jobs = self._isolate_storage(monkeypatch) + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={"provider": "openrouter"}, + ): + job = jobs.create_job(prompt="do a thing", schedule="every 1 hour") + + assert job["provider"] is None + assert job["provider_snapshot"] == "openrouter" + + def test_pinned_job_skips_snapshot(self, monkeypatch): + jobs = self._isolate_storage(monkeypatch) + + resolver = MagicMock(return_value={"provider": "openrouter"}) + with patch("hermes_cli.runtime_provider.resolve_runtime_provider", resolver): + job = jobs.create_job( + prompt="do a thing", schedule="every 1 hour", provider="nous" + ) + + # Explicit provider → pinned → no snapshot needed, and resolution skipped. + assert job["provider"] == "nous" + assert job["provider_snapshot"] is None + resolver.assert_not_called() + + def test_snapshot_resolution_error_fails_open_to_none(self, monkeypatch): + """If resolution raises at creation, snapshot is None — creation never breaks.""" + jobs = self._isolate_storage(monkeypatch) + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=RuntimeError("no creds"), + ): + job = jobs.create_job(prompt="do a thing", schedule="every 1 hour") + + assert job["provider_snapshot"] is None + + def test_unpinned_model_captures_model_snapshot(self, monkeypatch, tmp_path): + """An unpinned model captures config.yaml model.default into model_snapshot.""" + jobs = self._isolate_storage(monkeypatch) + (tmp_path / "config.yaml").write_text("model:\n default: llama-3.3-70b:free\n") + monkeypatch.setattr( + "cron.jobs.get_hermes_home", lambda: tmp_path, raising=True + ) + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={"provider": "openrouter"}, + ): + job = jobs.create_job(prompt="do a thing", schedule="every 1 hour") + assert job["model"] is None + assert job["model_snapshot"] == "llama-3.3-70b:free" + + def test_pinned_model_skips_model_snapshot(self, monkeypatch, tmp_path): + """An explicit model → pinned → no model_snapshot captured.""" + jobs = self._isolate_storage(monkeypatch) + (tmp_path / "config.yaml").write_text("model:\n default: llama-3.3-70b:free\n") + monkeypatch.setattr( + "cron.jobs.get_hermes_home", lambda: tmp_path, raising=True + ) + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={"provider": "openrouter"}, + ): + job = jobs.create_job( + prompt="do a thing", schedule="every 1 hour", model="my-model" + ) + assert job["model"] == "my-model" + assert job["model_snapshot"] is None + + +def _run_with_current_provider_and_model(job, current_provider, current_model, tmp_path): + """Drive run_job with resolved provider pinned and config.yaml model.default + set to ``current_model`` (the unpinned-model fire-time source).""" + (tmp_path / "config.yaml").write_text( + f"model:\n default: {current_model}\n" + ) + fake_db = MagicMock() + with patch("cron.scheduler._hermes_home", tmp_path), \ + patch("cron.scheduler._get_hermes_home", return_value=tmp_path), \ + patch("cron.scheduler._resolve_origin", return_value=None), \ + patch("dotenv.load_dotenv"), \ + patch("hermes_state.SessionDB", return_value=fake_db), \ + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={ + "api_key": "test-key", + "base_url": "https://example.invalid/v1", + "provider": current_provider, + "api_mode": "chat_completions", + }, + ), \ + patch("run_agent.AIAgent") as mock_agent_cls: + mock_agent = MagicMock() + mock_agent.run_conversation.return_value = {"final_response": "ok"} + mock_agent_cls.return_value = mock_agent + success, output, final_response, error = run_job(job) + agent_constructed = mock_agent_cls.called + return success, output, final_response, error, agent_constructed + + +class TestModelDriftGuard: + """#44585 C1: model drift on the SAME provider must also fail closed — + the incident named a model (claude-fable-5), and an unpinned job reads + config.yaml model.default fresh every tick independently of provider.""" + + def test_model_drift_same_provider_fails_closed(self, tmp_path): + # Provider unchanged (openrouter==openrouter), but the global default + # MODEL swapped to a premium model since creation → must fail closed. + job = _base_job( + provider_snapshot="openrouter", + model_snapshot="llama-3.3-70b-instruct:free", + ) + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider_and_model( + job, "openrouter", "claude-fable-5", tmp_path + ) + assert agent_constructed is False, "paid call must not be made on model drift" + assert success is False + blob = f"{error}\n{output}".lower() + assert "claude-fable-5" in blob + assert "llama-3.3-70b-instruct:free" in blob + assert "44585" in blob + + def test_model_snapshot_matches_runs(self, tmp_path): + # Default model unchanged → runs normally. + job = _base_job( + provider_snapshot="openrouter", + model_snapshot="llama-3.3-70b-instruct:free", + ) + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider_and_model( + job, "openrouter", "llama-3.3-70b-instruct:free", tmp_path + ) + assert agent_constructed is True + assert success is True + + def test_pinned_model_bypasses_guard(self, tmp_path): + # Explicit job["model"] → not unpinned → no model-drift skip even if the + # global default differs from any snapshot. + job = _base_job( + provider_snapshot="openrouter", + model_snapshot="old-model", + model="my-pinned-model", + ) + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider_and_model( + job, "openrouter", "claude-fable-5", tmp_path + ) + assert agent_constructed is True + assert success is True + + def test_no_model_snapshot_backcompat(self, tmp_path): + # Pre-existing job without model_snapshot → no model-drift skip. + job = _base_job(provider_snapshot="openrouter") # no model_snapshot key set to a value + success, output, final_response, error, agent_constructed = \ + _run_with_current_provider_and_model( + job, "openrouter", "claude-fable-5", tmp_path + ) + assert agent_constructed is True + assert success is True diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index 89357043d70..c3444f98d38 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -22,7 +22,7 @@ Cron jobs can: All of this is available to Hermes itself through the `cronjob` tool, so you can create, pause, edit, and remove jobs by asking in plain language — no CLI required. :::tip -Cron jobs use whatever provider `hermes model` selected. `hermes setup --portal` is the lowest-friction option for unattended runs since OAuth refresh is automatic. See [Nous Portal](/integrations/nous-portal). +At creation, an unpinned job (one you don't give an explicit `provider`/`model`) follows the global default selected by `hermes model` — and Hermes **snapshots** that provider and model on the job. If the global default later changes, the job **fails closed**: it skips the run, makes no inference call, and sends an alert telling you to pin the provider/model explicitly (`cronjob action=update job_id=… provider=… model=…`) to proceed. This prevents an unattended job from silently inheriting a switch to a paid provider/model and spending money you didn't intend (#44585). To make a job deliberately track your global default, pin it to the new values after changing them. `hermes setup --portal` is the lowest-friction option for unattended runs since OAuth refresh is automatic. See [Nous Portal](/integrations/nous-portal). ::: :::warning