revert(cron): return cron job storage to per-profile (reverts #32117 + #50993) (#51116)

* Revert "fix(cron): scope job execution to its owning profile (#32091 follow-up) (#50993)"

This reverts commit 660e36f097.

* Revert "fix(cron): anchor cron storage at the default root home (not the active profile)"

This reverts commit a5c09fd176.
This commit is contained in:
Teknium 2026-06-22 17:53:50 -07:00 committed by GitHub
parent 2a10b8384a
commit bb7ff7dc30
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 14 additions and 423 deletions

View file

@ -31,7 +31,7 @@ except ImportError: # pragma: no cover - non-Windows
msvcrt = None
from datetime import datetime, timedelta
from pathlib import Path
from hermes_constants import get_default_hermes_root, get_hermes_home
from hermes_constants import get_hermes_home
from typing import Optional, Dict, List, Any, Union
logger = logging.getLogger(__name__)
@ -49,7 +49,7 @@ except ImportError:
# Configuration
# =============================================================================
HERMES_DIR = get_default_hermes_root().resolve()
HERMES_DIR = get_hermes_home().resolve()
CRON_DIR = HERMES_DIR / "cron"
JOBS_FILE = CRON_DIR / "jobs.json"
# Heartbeat file the in-process ticker touches on every loop iteration. The
@ -248,12 +248,6 @@ def _normalize_job_record(job: Dict[str, Any]) -> Dict[str, Any]:
state = "scheduled" if normalized.get("enabled", True) else "paused"
normalized["state"] = state
# Legacy jobs (created before per-job profile scoping) have no profile
# field. Default them to "default" so the scheduler treats them as
# root-profile jobs — matching their pre-existing behaviour.
prof = normalized.get("profile")
normalized["profile"] = (str(prof).strip() if isinstance(prof, str) and prof.strip() else "default")
return normalized
@ -274,43 +268,6 @@ def _secure_file(path: Path):
pass
def current_profile_name() -> str:
"""Return the active profile name for the process creating a job.
``~/.hermes`` -> ``"default"``
``~/.hermes/profiles/X`` -> ``"X"``
Used at create time to tag a job with the profile whose environment
(.env / config.yaml / credentials) it should execute under, so the
job runs as its owning profile regardless of which profile's ticker
picks it up from the shared root store (#32091).
"""
try:
from agent.file_safety import _resolve_active_profile_name
return _resolve_active_profile_name() or "default"
except Exception:
return "default"
def resolve_profile_home(profile_name: Optional[str]) -> Optional[Path]:
"""Map a job's ``profile`` name to the HERMES_HOME it should run under.
``"default"`` / empty / ``None`` -> the root home (``get_default_hermes_root()``).
``"<name>"`` -> ``<root>/profiles/<name>``.
Returns ``None`` when the named profile directory does not exist, so the
scheduler can fall back to the ticker's own home and log a warning rather
than pointing a job at a missing profile.
"""
name = (profile_name or "").strip()
if not name or name == "default":
return get_default_hermes_root().resolve()
candidate = (get_default_hermes_root() / "profiles" / name).resolve()
if candidate.is_dir():
return candidate
return None
def ensure_dirs():
"""Ensure cron directories exist with secure permissions."""
CRON_DIR.mkdir(parents=True, exist_ok=True)
@ -658,44 +615,10 @@ def get_ticker_success_age() -> Optional[float]:
# Job CRUD Operations
# =============================================================================
_WARNED_ORPHAN_STORE = False
def _warn_if_orphaned_profile_store() -> None:
"""Loudly warn (once) if the root store is empty but a profile-local
jobs.json exists from before #32091's root-anchoring fix.
Such a file is now unreachable (the store anchors at the default root, not
the active profile). The jobs in it were already orphaned pre-fix (the
profile-less gateway never read them), so this is not a regression but a
user who could SEE them in `cron list` under their profile would otherwise
find them silently gone. Point them at the path instead of failing silent.
"""
global _WARNED_ORPHAN_STORE
if _WARNED_ORPHAN_STORE:
return
try:
active = get_hermes_home().resolve()
if active == HERMES_DIR:
return # not in a profile; nothing could be orphaned
legacy = active / "cron" / "jobs.json"
if legacy.exists():
_WARNED_ORPHAN_STORE = True
logger.warning(
"Cron jobs now live at %s (shared across profiles). A legacy "
"profile-local store exists at %s and is no longer read; "
"re-create those jobs or move them into the root store. (#32091)",
JOBS_FILE, legacy,
)
except Exception:
pass # best-effort advisory; never block load_jobs
def load_jobs() -> List[Dict[str, Any]]:
"""Load all jobs from storage."""
ensure_dirs()
if not JOBS_FILE.exists():
_warn_if_orphaned_profile_store()
return []
_strict_retry = False # track whether we used the strict=False fallback
@ -815,7 +738,6 @@ def create_job(
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
no_agent: bool = False,
profile: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create a new cron job.
@ -860,13 +782,6 @@ def create_job(
and deliver its stdout directly. Empty stdout = silent (no
delivery). Requires ``script`` to be set. Ideal for classic
watchdogs and periodic alerts that don't need LLM reasoning.
profile: Optional Hermes profile name the job should EXECUTE under
(its .env / config.yaml / credentials). Defaults to the active
profile of the session creating the job. The shared root store
holds every profile's jobs (#32091); this field is what scopes
a job's runtime environment to its owning profile so it runs
with that profile's permissions regardless of which ticker
picks it up.
Returns:
The created job dict
@ -901,11 +816,6 @@ def create_job(
normalized_toolsets = normalized_toolsets or None
normalized_workdir = _normalize_workdir(workdir)
normalized_no_agent = bool(no_agent)
# Tag the job with the profile whose environment it should execute under.
# When the caller does not pass one explicitly, capture the active profile
# of the session creating the job so a job created under `hermes -p donna`
# runs as donna even though it now lives in the shared root store (#32091).
normalized_profile = (str(profile).strip() if isinstance(profile, str) else "") or current_profile_name()
# no_agent jobs are meaningless without a script — the script IS the job.
# Surface this as a clear ValueError at create time so bad configs never
@ -959,7 +869,6 @@ def create_job(
"origin": origin, # Tracks where job was created for "origin" delivery
"enabled_toolsets": normalized_toolsets,
"workdir": normalized_workdir,
"profile": normalized_profile,
}
with _jobs_lock():

View file

@ -316,17 +316,9 @@ def _get_hermes_home() -> Path:
def _get_lock_paths() -> tuple[Path, Path]:
"""Resolve cron lock paths at call time so profile/env changes are honored.
Anchored on the DEFAULT ROOT home (not the active profile), matching the
jobs store in cron.jobs (which uses get_default_hermes_root). The tick lock
is storage-coordination it must live next to the single jobs.json so that
tickers running under different profiles share one lock and can't
double-fire the relocated store (#32091). Execution context (.env,
config.yaml, scripts) stays profile-aware via _get_hermes_home().
"""
from hermes_constants import get_default_hermes_root
lock_dir = (_hermes_home or get_default_hermes_root()) / "cron"
"""Resolve cron lock paths at call time so profile/env changes are honored."""
hermes_home = _get_hermes_home()
lock_dir = hermes_home / "cron"
return lock_dir, lock_dir / ".tick.lock"
@ -1857,32 +1849,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
os.environ["TERMINAL_CWD"] = _job_workdir
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
# Scope this job's execution to its owning profile's HERMES_HOME (#32091).
# The shared root store holds every profile's jobs, but a job must run with
# the .env / config.yaml / credentials of the profile that created it — not
# whichever profile's ticker happened to pick it up. We set both the
# in-process ContextVar override (consumed by _get_hermes_home() for the
# config/.env/script loads below) AND os.environ["HERMES_HOME"] (inherited
# by any child subprocess the agent spawns). tick() routes profile-scoped
# jobs to the single-worker sequential pool, so mutating os.environ here is
# safe — they never overlap. Restored in the finally block.
from cron.jobs import resolve_profile_home
from hermes_constants import set_hermes_home_override
_job_profile = (job.get("profile") or "default").strip() or "default"
_profile_home = resolve_profile_home(_job_profile)
_prior_hermes_home = os.environ.get("HERMES_HOME", "_UNSET_")
_hermes_home_token = None
if _profile_home is not None and _profile_home != _get_hermes_home().resolve():
os.environ["HERMES_HOME"] = str(_profile_home)
_hermes_home_token = set_hermes_home_override(str(_profile_home))
logger.info("Job '%s': executing under profile %r (HERMES_HOME=%s)",
job_id, _job_profile, _profile_home)
elif _profile_home is None and _job_profile != "default":
logger.warning(
"Job '%s': profile %r no longer exists — running under the "
"ticker's profile instead", job_id, _job_profile,
)
try:
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
@ -2294,19 +2260,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
os.environ.pop("TERMINAL_CWD", None)
else:
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
# Restore HERMES_HOME to the ticker's value when this job overrode it
# for profile-scoped execution (#32091). Mirrors the TERMINAL_CWD
# restore above; the sequential pool guarantees no overlap.
if _hermes_home_token is not None:
try:
from hermes_constants import reset_hermes_home_override
reset_hermes_home_override(_hermes_home_token)
except Exception:
pass
if _prior_hermes_home == "_UNSET_":
os.environ.pop("HERMES_HOME", None)
else:
os.environ["HERMES_HOME"] = _prior_hermes_home
# Clean up ContextVar session/delivery state for this job.
clear_session_vars(_ctx_tokens)
for _var_name in _cron_delivery_vars:
@ -2512,26 +2465,12 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i
body."""
return run_one_job(job, adapters=adapters, loop=loop, verbose=verbose)
# Partition due jobs: those that mutate process-global os.environ
# inside run_job MUST run sequentially to avoid corrupting each other.
# Two cases mutate env:
# - a per-job workdir sets os.environ["TERMINAL_CWD"].
# - a per-job profile whose HERMES_HOME differs from the ticker's
# sets os.environ["HERMES_HOME"] to scope execution (#32091).
# Jobs that need neither leave env untouched and stay parallel-safe.
def _needs_sequential(j: dict) -> bool:
if (j.get("workdir") or "").strip():
return True
prof = (j.get("profile") or "default").strip() or "default"
try:
from cron.jobs import resolve_profile_home
phome = resolve_profile_home(prof)
except Exception:
phome = None
return phome is not None and phome != _get_hermes_home().resolve()
sequential_jobs = [j for j in due_jobs if _needs_sequential(j)]
parallel_jobs = [j for j in due_jobs if not _needs_sequential(j)]
# Partition due jobs: those with a per-job workdir mutate
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
# so they MUST run sequentially to avoid corrupting each other. Jobs
# without a workdir leave env untouched and stay parallel-safe.
sequential_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
_results: list = []
_all_futures: list = []

View file

@ -36,13 +36,13 @@ import uuid
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_constants import get_default_hermes_root
from hermes_constants import get_hermes_home
from hermes_time import now as _hermes_now
from utils import atomic_replace
logger = logging.getLogger(__name__)
CRON_DIR = get_default_hermes_root().resolve() / "cron"
CRON_DIR = get_hermes_home().resolve() / "cron"
SUGGESTIONS_FILE = CRON_DIR / "suggestions.json"
# In-process lock protecting load->modify->save cycles (the background review