fix(cron): scope job execution to its owning profile (#32091 follow-up) (#50993)

The #32091 fix moved every profile's cron jobs into one shared root store,
but never wired the execution-scoping half it recommended: a job still ran
under whichever profile's ticker picked it up, not its owning profile. So a
job created under `hermes -p donna` could execute with the root profile's
.env / config.yaml / credentials.

- jobs.py: create_job auto-captures the active profile (explicit profile=
  override available) and stores it on the job; resolve_profile_home() maps a
  profile name to its HERMES_HOME; legacy jobs backfill to 'default'.
- scheduler.py: run_job applies the job's profile via a scoped HERMES_HOME
  override (env var + in-process ContextVar) before any .env/config/script
  load, restored in finally. tick() routes profile-mismatched jobs to the
  single-worker sequential pool so the env mutation can't race.
- cronjob tool threads profile through (NOT exposed in the model schema, to
  avoid cross-profile privilege escalation); hermes cron add gains --profile.

E2E verified against a temp HERMES_HOME with a real profile dir: a root-profile
ticker runs a profile='donna' job with HERMES_HOME=donna during execution and
restores the ticker env afterward.
This commit is contained in:
Teknium 2026-06-22 14:54:28 -07:00 committed by GitHub
parent 15880da8bb
commit 660e36f097
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 265 additions and 6 deletions

View file

@ -248,6 +248,12 @@ def _normalize_job_record(job: Dict[str, Any]) -> Dict[str, Any]:
state = "scheduled" if normalized.get("enabled", True) else "paused"
normalized["state"] = state
# Legacy jobs (created before per-job profile scoping) have no profile
# field. Default them to "default" so the scheduler treats them as
# root-profile jobs — matching their pre-existing behaviour.
prof = normalized.get("profile")
normalized["profile"] = (str(prof).strip() if isinstance(prof, str) and prof.strip() else "default")
return normalized
@ -268,6 +274,43 @@ def _secure_file(path: Path):
pass
def current_profile_name() -> str:
"""Return the active profile name for the process creating a job.
``~/.hermes`` -> ``"default"``
``~/.hermes/profiles/X`` -> ``"X"``
Used at create time to tag a job with the profile whose environment
(.env / config.yaml / credentials) it should execute under, so the
job runs as its owning profile regardless of which profile's ticker
picks it up from the shared root store (#32091).
"""
try:
from agent.file_safety import _resolve_active_profile_name
return _resolve_active_profile_name() or "default"
except Exception:
return "default"
def resolve_profile_home(profile_name: Optional[str]) -> Optional[Path]:
"""Map a job's ``profile`` name to the HERMES_HOME it should run under.
``"default"`` / empty / ``None`` -> the root home (``get_default_hermes_root()``).
``"<name>"`` -> ``<root>/profiles/<name>``.
Returns ``None`` when the named profile directory does not exist, so the
scheduler can fall back to the ticker's own home and log a warning rather
than pointing a job at a missing profile.
"""
name = (profile_name or "").strip()
if not name or name == "default":
return get_default_hermes_root().resolve()
candidate = (get_default_hermes_root() / "profiles" / name).resolve()
if candidate.is_dir():
return candidate
return None
def ensure_dirs():
"""Ensure cron directories exist with secure permissions."""
CRON_DIR.mkdir(parents=True, exist_ok=True)
@ -772,6 +815,7 @@ def create_job(
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
no_agent: bool = False,
profile: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create a new cron job.
@ -816,6 +860,13 @@ def create_job(
and deliver its stdout directly. Empty stdout = silent (no
delivery). Requires ``script`` to be set. Ideal for classic
watchdogs and periodic alerts that don't need LLM reasoning.
profile: Optional Hermes profile name the job should EXECUTE under
(its .env / config.yaml / credentials). Defaults to the active
profile of the session creating the job. The shared root store
holds every profile's jobs (#32091); this field is what scopes
a job's runtime environment to its owning profile so it runs
with that profile's permissions regardless of which ticker
picks it up.
Returns:
The created job dict
@ -850,6 +901,11 @@ def create_job(
normalized_toolsets = normalized_toolsets or None
normalized_workdir = _normalize_workdir(workdir)
normalized_no_agent = bool(no_agent)
# Tag the job with the profile whose environment it should execute under.
# When the caller does not pass one explicitly, capture the active profile
# of the session creating the job so a job created under `hermes -p donna`
# runs as donna even though it now lives in the shared root store (#32091).
normalized_profile = (str(profile).strip() if isinstance(profile, str) else "") or current_profile_name()
# no_agent jobs are meaningless without a script — the script IS the job.
# Surface this as a clear ValueError at create time so bad configs never
@ -903,6 +959,7 @@ def create_job(
"origin": origin, # Tracks where job was created for "origin" delivery
"enabled_toolsets": normalized_toolsets,
"workdir": normalized_workdir,
"profile": normalized_profile,
}
with _jobs_lock():

View file

@ -1857,6 +1857,32 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
os.environ["TERMINAL_CWD"] = _job_workdir
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
# Scope this job's execution to its owning profile's HERMES_HOME (#32091).
# The shared root store holds every profile's jobs, but a job must run with
# the .env / config.yaml / credentials of the profile that created it — not
# whichever profile's ticker happened to pick it up. We set both the
# in-process ContextVar override (consumed by _get_hermes_home() for the
# config/.env/script loads below) AND os.environ["HERMES_HOME"] (inherited
# by any child subprocess the agent spawns). tick() routes profile-scoped
# jobs to the single-worker sequential pool, so mutating os.environ here is
# safe — they never overlap. Restored in the finally block.
from cron.jobs import resolve_profile_home
from hermes_constants import set_hermes_home_override
_job_profile = (job.get("profile") or "default").strip() or "default"
_profile_home = resolve_profile_home(_job_profile)
_prior_hermes_home = os.environ.get("HERMES_HOME", "_UNSET_")
_hermes_home_token = None
if _profile_home is not None and _profile_home != _get_hermes_home().resolve():
os.environ["HERMES_HOME"] = str(_profile_home)
_hermes_home_token = set_hermes_home_override(str(_profile_home))
logger.info("Job '%s': executing under profile %r (HERMES_HOME=%s)",
job_id, _job_profile, _profile_home)
elif _profile_home is None and _job_profile != "default":
logger.warning(
"Job '%s': profile %r no longer exists — running under the "
"ticker's profile instead", job_id, _job_profile,
)
try:
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
@ -2268,6 +2294,19 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
os.environ.pop("TERMINAL_CWD", None)
else:
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
# Restore HERMES_HOME to the ticker's value when this job overrode it
# for profile-scoped execution (#32091). Mirrors the TERMINAL_CWD
# restore above; the sequential pool guarantees no overlap.
if _hermes_home_token is not None:
try:
from hermes_constants import reset_hermes_home_override
reset_hermes_home_override(_hermes_home_token)
except Exception:
pass
if _prior_hermes_home == "_UNSET_":
os.environ.pop("HERMES_HOME", None)
else:
os.environ["HERMES_HOME"] = _prior_hermes_home
# Clean up ContextVar session/delivery state for this job.
clear_session_vars(_ctx_tokens)
for _var_name in _cron_delivery_vars:
@ -2473,12 +2512,26 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i
body."""
return run_one_job(job, adapters=adapters, loop=loop, verbose=verbose)
# Partition due jobs: those with a per-job workdir mutate
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
# so they MUST run sequentially to avoid corrupting each other. Jobs
# without a workdir leave env untouched and stay parallel-safe.
sequential_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
# Partition due jobs: those that mutate process-global os.environ
# inside run_job MUST run sequentially to avoid corrupting each other.
# Two cases mutate env:
# - a per-job workdir sets os.environ["TERMINAL_CWD"].
# - a per-job profile whose HERMES_HOME differs from the ticker's
# sets os.environ["HERMES_HOME"] to scope execution (#32091).
# Jobs that need neither leave env untouched and stay parallel-safe.
def _needs_sequential(j: dict) -> bool:
if (j.get("workdir") or "").strip():
return True
prof = (j.get("profile") or "default").strip() or "default"
try:
from cron.jobs import resolve_profile_home
phome = resolve_profile_home(prof)
except Exception:
phome = None
return phome is not None and phome != _get_hermes_home().resolve()
sequential_jobs = [j for j in due_jobs if _needs_sequential(j)]
parallel_jobs = [j for j in due_jobs if not _needs_sequential(j)]
_results: list = []
_all_futures: list = []

View file

@ -120,6 +120,9 @@ def cron_list(show_all: bool = False):
workdir = job.get("workdir")
if workdir:
print(f" Workdir: {workdir}")
_prof = job.get("profile")
if _prof and _prof != "default":
print(f" Profile: {_prof}")
# Execution history
last_status = job.get("last_status")
@ -259,6 +262,7 @@ def cron_create(args):
script=getattr(args, "script", None),
workdir=getattr(args, "workdir", None),
no_agent=getattr(args, "no_agent", False) or None,
profile=getattr(args, "profile", None),
)
if not result.get("success"):
print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
@ -275,6 +279,9 @@ def cron_create(args):
print(" Mode: no-agent (script stdout delivered directly)")
if job_data.get("workdir"):
print(f" Workdir: {job_data['workdir']}")
_prof = job_data.get("profile")
if _prof and _prof != "default":
print(f" Profile: {_prof}")
print(f" Next run: {result['next_run_at']}")
return 0

View file

@ -70,6 +70,10 @@ def build_cron_parser(subparsers, *, cmd_cron: Callable) -> None:
"--workdir",
help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).",
)
cron_create.add_argument(
"--profile",
help="Hermes profile the job should EXECUTE under (its .env / config.yaml / credentials). Defaults to the profile that created the job. Jobs live in one shared root store (#32091); this scopes a job's runtime environment to the named profile so it runs with that profile's permissions.",
)
# cron edit
cron_edit = cron_subparsers.add_parser(

View file

@ -103,3 +103,139 @@ def test_get_default_hermes_root_docker_layouts(tmp_path, monkeypatch):
# Docker profile layout: <custom>/profiles/<name> -> <custom>.
monkeypatch.setenv("HERMES_HOME", "/opt/data/profiles/coder")
assert hermes_constants.get_default_hermes_root() == Path("/opt/data")
# ---------------------------------------------------------------------------
# Per-job profile EXECUTION scoping (#32091 follow-up).
#
# The storage half of #32091 (above) moved every profile's jobs into one shared
# root store. But a job must still EXECUTE under its owning profile's
# environment (.env / config.yaml / credentials) — not whichever profile's
# ticker picks it up. These tests cover the execution-scoping half.
# ---------------------------------------------------------------------------
def _profile_env(tmp_path, monkeypatch, active="default"):
"""Set up a root home with a 'donna' profile dir and point the platform
default at it. Returns (root, donna_home). ``active`` selects which
HERMES_HOME the process runs under."""
root = tmp_path / "hermes_home"
(root / "cron").mkdir(parents=True)
donna_home = root / "profiles" / "donna"
(donna_home / "cron").mkdir(parents=True)
import hermes_constants
monkeypatch.setattr(hermes_constants, "_get_platform_default_hermes_home",
lambda: root)
monkeypatch.setenv("HERMES_HOME", str(root if active == "default" else donna_home))
return root, donna_home
def test_create_job_autocaptures_active_profile(tmp_path, monkeypatch):
"""A job created from inside a profile session is tagged with that profile,
so the scheduler can later scope its execution back to it."""
root, donna_home = _profile_env(tmp_path, monkeypatch, active="donna")
import cron.jobs as jobs
importlib.reload(jobs)
try:
job = jobs.create_job(prompt="audit", schedule="every 1h", name="a")
# auto-captured from the active (donna) session
assert job["profile"] == "donna"
# and it landed in the SHARED ROOT store, not donna's profile-local one
assert jobs.JOBS_FILE.resolve() == (root / "cron" / "jobs.json").resolve()
assert jobs.JOBS_FILE.exists()
assert not (donna_home / "cron" / "jobs.json").exists()
finally:
monkeypatch.undo()
importlib.reload(jobs)
def test_create_job_explicit_profile_override(tmp_path, monkeypatch):
"""An explicit profile= wins over the auto-captured active profile."""
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
(root / "profiles" / "ops" / "cron").mkdir(parents=True)
import cron.jobs as jobs
importlib.reload(jobs)
try:
job = jobs.create_job(prompt="x", schedule="every 2h", profile="ops")
assert job["profile"] == "ops"
finally:
monkeypatch.undo()
importlib.reload(jobs)
def test_resolve_profile_home_maps_names(tmp_path, monkeypatch):
"""resolve_profile_home maps default/named profiles to homes and returns
None for a missing profile."""
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
import cron.jobs as jobs
importlib.reload(jobs)
try:
assert jobs.resolve_profile_home("default").resolve() == root.resolve()
assert jobs.resolve_profile_home("").resolve() == root.resolve()
assert jobs.resolve_profile_home("donna").resolve() == donna_home.resolve()
assert jobs.resolve_profile_home("ghost") is None
finally:
monkeypatch.undo()
importlib.reload(jobs)
def test_normalize_backfills_legacy_profile_to_default(tmp_path, monkeypatch):
"""A pre-feature job with no profile field reads back as 'default'."""
import cron.jobs as jobs
legacy = {"id": "l1", "name": "old", "prompt": "x",
"schedule": {"kind": "interval", "minutes": 60}}
assert jobs._normalize_job_record(legacy)["profile"] == "default"
def test_run_job_scopes_execution_to_job_profile(tmp_path, monkeypatch):
"""The decisive test: a ticker running as the ROOT profile executes a
job tagged profile='donna' with HERMES_HOME pointed at donna's home
(both the env var and the in-process override), then restores the
ticker's env afterward."""
from unittest.mock import MagicMock, patch
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
(donna_home / "config.yaml").write_text("model:\n default: openrouter/test\n")
import hermes_constants
import cron.jobs as jobs
import cron.scheduler as sched
importlib.reload(jobs)
importlib.reload(sched)
captured = {}
def fake_run_conversation(prompt, *a, **k):
captured["env"] = os.environ.get("HERMES_HOME")
captured["override"] = hermes_constants.get_hermes_home_override()
captured["resolved"] = str(hermes_constants.get_hermes_home())
return {"final_response": "done", "completed": True, "failed": False,
"turn_exit_reason": "text_response(finish_reason=stop)"}
job = {"id": "j-donna", "name": "donna-audit", "prompt": "audit",
"profile": "donna", "schedule": {"kind": "interval", "minutes": 60},
"deliver": "local", "model": "openrouter/test"}
before = os.environ.get("HERMES_HOME")
try:
fake_agent = MagicMock()
fake_agent.run_conversation.side_effect = fake_run_conversation
with patch("cron.scheduler._resolve_origin", return_value=None), \
patch("dotenv.load_dotenv"), \
patch("hermes_state.SessionDB", return_value=MagicMock()), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
return_value={"api_key": "k", "base_url": "https://x/v1",
"provider": "openrouter", "api_mode": "chat_completions"}), \
patch("run_agent.AIAgent", return_value=fake_agent):
success, output, final, err = sched.run_job(job)
assert success is True, (success, err)
# During execution the job ran AS donna:
assert captured["env"] == str(donna_home)
assert captured["override"] == str(donna_home)
assert captured["resolved"] == str(donna_home)
# After the job, the ticker's HERMES_HOME is restored (no leak):
assert os.environ.get("HERMES_HOME") == before
finally:
monkeypatch.undo()
importlib.reload(jobs)
importlib.reload(sched)

View file

@ -539,6 +539,7 @@ def cronjob(
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
no_agent: Optional[bool] = None,
profile: Optional[str] = None,
task_id: str = None,
) -> str:
"""Unified cron job management tool."""
@ -605,6 +606,7 @@ def cronjob(
enabled_toolsets=enabled_toolsets or None,
workdir=_normalize_optional_job_value(workdir),
no_agent=_no_agent,
profile=_normalize_optional_job_value(profile),
)
_notify_provider_jobs_changed_safe()
return json.dumps(