From a657397769ab69b3bc72afca38161e04ee36aff7 Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 13:08:21 +1000
Subject: [PATCH 001/470] test(cron): characterize in-process + desktop ticker
contract before provider refactor
---
tests/cron/test_scheduler_provider.py | 83 +++++++++++++++++++++++++++
1 file changed, 83 insertions(+)
create mode 100644 tests/cron/test_scheduler_provider.py
diff --git a/tests/cron/test_scheduler_provider.py b/tests/cron/test_scheduler_provider.py
new file mode 100644
index 00000000000..1e94347dfa8
--- /dev/null
+++ b/tests/cron/test_scheduler_provider.py
@@ -0,0 +1,83 @@
+"""Characterization tests for the cron trigger before/after the provider refactor.
+
+These lock the CURRENT in-process-ticker contract (Phase 0 of the pluggable
+CronScheduler plan, .hermes/plans/cron-scheduler-provider-interface.md). They
+must pass unchanged on `main` now, and after every subsequent phase of the
+refactor — they are the regression harness that proves the built-in firing
+behavior is byte-for-byte preserved when the ticker is moved behind the
+CronScheduler provider interface.
+
+No production code is exercised beyond the two ticker entry points:
+ - gateway/run.py::_start_cron_ticker (production gateway ticker)
+ - hermes_cli/web_server.py::_start_desktop_cron_ticker (desktop fallback)
+
+Both call `cron.scheduler.tick(...)` on a loop and exit when their stop_event
+is set. We patch `cron.scheduler.tick` (both tickers import it locally as
+`cron_tick`, so the module-attribute patch is observed) and assert the loop
+drives it and stops promptly.
+"""
+import threading
+import time
+from unittest.mock import patch
+
+
+def test_ticker_calls_tick_at_least_once_then_stops():
+ """The gateway in-process ticker loop calls cron.scheduler.tick repeatedly
+ and exits promptly once the stop_event is set."""
+ from gateway.run import _start_cron_ticker
+
+ calls = []
+ stop = threading.Event()
+
+ def fake_tick(*args, **kwargs):
+ calls.append(kwargs)
+ return 0
+
+ with patch("cron.scheduler.tick", side_effect=fake_tick):
+ # interval=0 keeps the loop tight; stop after a brief beat.
+ t = threading.Thread(
+ target=_start_cron_ticker,
+ args=(stop,),
+ kwargs={"interval": 0},
+ daemon=True,
+ )
+ t.start()
+ time.sleep(0.2)
+ stop.set()
+ t.join(timeout=5)
+
+ assert not t.is_alive(), "ticker did not exit after stop_event was set"
+ assert len(calls) >= 1, "ticker never called tick()"
+ # Contract: the ticker invokes tick with sync=False (fire-and-forget from
+ # the background thread, never the synchronous CLI path).
+ assert calls[0].get("sync") is False
+
+
+def test_desktop_ticker_calls_tick_then_stops():
+ """The desktop dashboard ticker loop calls cron.scheduler.tick and exits
+ once the stop_event is set. Desktop has no live adapters, so it ticks with
+ no adapters/loop."""
+ from hermes_cli.web_server import _start_desktop_cron_ticker
+
+ calls = []
+ stop = threading.Event()
+
+ def fake_tick(*args, **kwargs):
+ calls.append(kwargs)
+ return 0
+
+ with patch("cron.scheduler.tick", side_effect=fake_tick):
+ t = threading.Thread(
+ target=_start_desktop_cron_ticker,
+ args=(stop,),
+ kwargs={"interval": 0},
+ daemon=True,
+ )
+ t.start()
+ time.sleep(0.2)
+ stop.set()
+ t.join(timeout=5)
+
+ assert not t.is_alive(), "desktop ticker did not exit after stop_event was set"
+ assert len(calls) >= 1, "desktop ticker never called tick()"
+ assert calls[0].get("sync") is False
From e6ff41ca9516cbca6470a56b1ab98939dbdb935a Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 13:58:43 +1000
Subject: [PATCH 002/470] feat(cron): CronScheduler ABC +
InProcessCronScheduler (provider #1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 1 of the pluggable cron-scheduler refactor (Axis B — the trigger).
No call-site changes; this phase only makes the abstraction exist + tested
in isolation.
Task 1.1: cron/scheduler_provider.py — the EXPERIMENTAL CronScheduler ABC.
Required surface is name + start; is_available()/stop() carry safe defaults.
is_available has a no-network invariant. Docstring marks it experimental
until the Chronos provider (Phase 4) validates the shape.
Task 1.2: InProcessCronScheduler wraps the historical 60s ticker loop, calling
cron.scheduler.tick(sync=False) exactly as the raw ticker does. Uses
stop_event.wait(interval) for responsive stop (both raw tickers already do).
Tests: ABC-is-abstract, default-is_available, the InProcess loop drives tick
and stops, stop() no-op, and test_abc_growth_stays_additive (the forward-compat
guard: required abstractmethods must stay exactly {name, start}, so the three
Phase-4 hooks land as NON-abstract additions).
tick() internals in cron/scheduler.py are byte-unchanged (only new file added).
Phase 0 characterization tests still green. Full tests/cron/: 445 passed.
---
cron/scheduler_provider.py | 98 +++++++++++++++++++++++++++
tests/cron/test_scheduler_provider.py | 78 +++++++++++++++++++++
2 files changed, 176 insertions(+)
create mode 100644 cron/scheduler_provider.py
diff --git a/cron/scheduler_provider.py b/cron/scheduler_provider.py
new file mode 100644
index 00000000000..329cf4ae8a6
--- /dev/null
+++ b/cron/scheduler_provider.py
@@ -0,0 +1,98 @@
+"""CronScheduler provider interface (Axis B — the trigger).
+
+⚠️ EXPERIMENTAL — this interface is validated by exactly ONE consumer (the
+built-in) until an external provider (Chronos, Phase 4) shakes it out. Until
+then the module path, method signatures, and start() kwargs MAY change without
+a deprecation cycle. Once a second provider validates the shape it becomes
+stable. Any growth MUST be additive (new optional method with a default), never
+a changed signature on start() or a new abstractmethod.
+
+A CronScheduler decides *when* a due job fires. It does NOT decide what firing
+means: execution + delivery stay in cron.scheduler.run_job / _deliver_result,
+shared by all providers. Providers must never reimplement agent construction or
+delivery.
+
+The built-in InProcessCronScheduler runs the historical 60s daemon-thread
+ticker. Alternative providers (e.g. Chronos, a NAS-mediated managed-cron
+provider for scale-to-zero deployments) live under plugins/cron// and are
+selected via the `cron.provider` config key (empty = built-in).
+"""
+from __future__ import annotations
+
+import threading
+from abc import ABC, abstractmethod
+from typing import Any
+
+
+class CronScheduler(ABC):
+ """Axis-B trigger provider. Decides WHEN a due cron job fires.
+
+ Required surface is intentionally minimal: ``name`` + ``start``. ``stop``
+ and ``is_available`` carry safe defaults. The three Phase-4 hooks
+ (``on_jobs_changed`` / ``fire_due`` / ``reconcile``) are added later as
+ NON-abstract methods so the built-in keeps satisfying the ABC without
+ overriding them — see ``test_abc_growth_stays_additive``.
+ """
+
+ @property
+ @abstractmethod
+ def name(self) -> str:
+ """Short identifier, e.g. 'builtin', 'chronos'."""
+
+ def is_available(self) -> bool:
+ """Whether this provider can run in the current environment.
+
+ MUST NOT make network calls. The built-in is always available; an
+ external provider checks for configured endpoint/credentials. When a
+ named provider returns False, the resolver falls back to the built-in.
+ """
+ return True
+
+ @abstractmethod
+ def start(
+ self,
+ stop_event: threading.Event,
+ *,
+ adapters: Any = None,
+ loop: Any = None,
+ interval: int = 60,
+ ) -> None:
+ """Begin firing due jobs.
+
+ For the built-in this BLOCKS in the 60s loop until stop_event is set
+ (it is run inside a daemon thread by the caller, exactly as today).
+ An external provider may register a schedule/webhook and return
+ immediately; in that case it must still honor stop_event for teardown.
+ """
+
+ def stop(self) -> None:
+ """Optional eager teardown hook. Default no-op; setting the stop_event
+ is the primary stop signal. Override for providers holding external
+ resources (queue consumers, HTTP servers)."""
+ return None
+
+
+class InProcessCronScheduler(CronScheduler):
+ """Default provider: the historical in-process 60s ticker.
+
+ ``start()`` blocks in the tick loop until ``stop_event`` is set, identical
+ to the pre-refactor ``_start_cron_ticker`` core loop. The caller runs it in
+ a daemon thread.
+ """
+
+ @property
+ def name(self) -> str:
+ return "builtin"
+
+ def start(self, stop_event, *, adapters=None, loop=None, interval=60):
+ import logging
+ from cron.scheduler import tick as cron_tick
+
+ logger = logging.getLogger("cron.scheduler_provider")
+ logger.info("In-process cron scheduler started (interval=%ds)", interval)
+ while not stop_event.is_set():
+ try:
+ cron_tick(verbose=False, adapters=adapters, loop=loop, sync=False)
+ except Exception as e:
+ logger.debug("Cron tick error: %s", e)
+ stop_event.wait(interval)
diff --git a/tests/cron/test_scheduler_provider.py b/tests/cron/test_scheduler_provider.py
index 1e94347dfa8..74b3891122c 100644
--- a/tests/cron/test_scheduler_provider.py
+++ b/tests/cron/test_scheduler_provider.py
@@ -81,3 +81,81 @@ def test_desktop_ticker_calls_tick_then_stops():
assert not t.is_alive(), "desktop ticker did not exit after stop_event was set"
assert len(calls) >= 1, "desktop ticker never called tick()"
assert calls[0].get("sync") is False
+
+
+# ── Phase 1: CronScheduler ABC + InProcessCronScheduler ──────────────────────
+
+
+def test_cronscheduler_is_abstract():
+ """name + start are abstract — the bare ABC can't be instantiated."""
+ import pytest
+ from cron.scheduler_provider import CronScheduler
+
+ with pytest.raises(TypeError):
+ CronScheduler()
+
+
+def test_cronscheduler_default_is_available_true():
+ """is_available defaults to True (no-network) for a minimal subclass."""
+ from cron.scheduler_provider import CronScheduler
+
+ class Dummy(CronScheduler):
+ @property
+ def name(self):
+ return "dummy"
+
+ def start(self, stop_event, **kw):
+ pass
+
+ assert Dummy().is_available() is True
+
+
+def test_abc_growth_stays_additive():
+ """Forward-compat guard: the ABC's REQUIRED surface is exactly name+start.
+
+ Any optional hook added later for the external provider
+ (on_jobs_changed/fire_due/reconcile) must be NON-abstract (carry a default),
+ so the built-in keeps satisfying the ABC without overriding them. This test
+ fails loudly if someone makes a future hook abstract (a breaking change that
+ would force every provider — including the built-in — to implement it).
+ """
+ from cron.scheduler_provider import CronScheduler
+
+ abstract = set(getattr(CronScheduler, "__abstractmethods__", set()))
+ assert abstract == {"name", "start"}, (
+ f"CronScheduler abstractmethods changed to {abstract}; growth must be "
+ "additive (optional methods with defaults), not new abstract methods."
+ )
+
+
+def test_inprocess_provider_ticks_and_stops():
+ """The built-in provider drives cron.scheduler.tick(sync=False) on a loop
+ and exits promptly when stop_event is set — same contract as the raw
+ ticker characterized above."""
+ from cron.scheduler_provider import InProcessCronScheduler
+
+ calls = []
+ stop = threading.Event()
+ prov = InProcessCronScheduler()
+ assert prov.name == "builtin"
+
+ with patch("cron.scheduler.tick", side_effect=lambda *a, **k: calls.append(k) or 0):
+ t = threading.Thread(
+ target=prov.start, args=(stop,), kwargs={"interval": 0}, daemon=True
+ )
+ t.start()
+ time.sleep(0.2)
+ stop.set()
+ t.join(timeout=5)
+
+ assert not t.is_alive(), "provider did not exit after stop_event was set"
+ assert len(calls) >= 1, "provider never called tick()"
+ assert calls[0].get("sync") is False
+
+
+def test_inprocess_provider_stop_is_noop():
+ """The default stop() hook is a safe no-op (the stop_event is the real
+ stop signal for the built-in)."""
+ from cron.scheduler_provider import InProcessCronScheduler
+
+ assert InProcessCronScheduler().stop() is None
From ae8fa11097e181ee61a2f5feba0c77f1d3d1d69d Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:09:36 +1000
Subject: [PATCH 003/470] feat(cron): cron.provider config + plugins/cron
discovery + resolver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 2 of the pluggable cron-scheduler refactor. Still no call-site changes;
this wires up provider SELECTION with a hard safety net.
Task 2.1: cron.provider config key (hermes_cli/config.py), empty = built-in.
Additive key — deep-merge picks it up into existing configs with no version
bump (verified: load_config() yields the key on a pre-existing config.yaml).
Task 2.2: plugins/cron/__init__.py — discovery machinery cloned near-verbatim
from plugins/memory/__init__.py, retargeted at CronScheduler /
register_cron_scheduler. Bundled (plugins/cron//) + user
(/plugins//) dirs, bundled wins collisions. The built-in is
NOT discovered here — it's core, so the fallback can't be removed.
Task 2.3: resolve_cron_scheduler() in cron/scheduler_provider.py — reads
cron.provider and ALWAYS degrades to built-in (missing / unavailable / load
error / typo all fall back with a warning). cron can never be left without a
trigger.
Deviation from plan: the plan's resolver snippet used cfg_get("cron.provider")
(dotted-string form). The real cfg_get signature is cfg_get(cfg, *keys,
default=) — corrected to cfg_get(load_config(), "cron", "provider", default=""),
matching plugins/memory/__init__.py:349. Tests monkeypatch load_config (not
cfg_get) so the real traversal runs.
Tests: default key empty, discovery returns list, unknown load returns None,
and the four resolver paths (empty→builtin, no-section→builtin,
unknown→builtin, unavailable→builtin, available→used). Full tests/cron/: 453
passed; config suite green (additive key, no migration break).
---
cron/scheduler_provider.py | 40 +++
hermes_cli/config.py | 8 +
plugins/cron/__init__.py | 344 ++++++++++++++++++++++++++
tests/cron/test_scheduler_provider.py | 103 ++++++++
4 files changed, 495 insertions(+)
create mode 100644 plugins/cron/__init__.py
diff --git a/cron/scheduler_provider.py b/cron/scheduler_provider.py
index 329cf4ae8a6..45243e7749c 100644
--- a/cron/scheduler_provider.py
+++ b/cron/scheduler_provider.py
@@ -72,6 +72,46 @@ class CronScheduler(ABC):
return None
+def resolve_cron_scheduler() -> "CronScheduler":
+ """Return the active cron scheduler provider.
+
+ Reads ``cron.provider`` from config. Empty/absent → built-in. A named
+ provider that is missing, fails to load, or reports ``is_available() ==
+ False`` falls back to the built-in with a warning — cron must never be left
+ without a trigger.
+ """
+ import logging
+
+ logger = logging.getLogger("cron.scheduler_provider")
+
+ name = ""
+ try:
+ from hermes_cli.config import cfg_get, load_config
+ name = (cfg_get(load_config(), "cron", "provider", default="") or "").strip()
+ except Exception:
+ pass
+
+ if not name or name in ("builtin", "in-process", "inprocess"):
+ return InProcessCronScheduler()
+
+ try:
+ from plugins.cron import load_cron_scheduler
+ provider = load_cron_scheduler(name)
+ if provider is None:
+ logger.warning("cron.provider '%s' not found; using built-in ticker", name)
+ return InProcessCronScheduler()
+ if not provider.is_available():
+ logger.warning("cron.provider '%s' not available; using built-in ticker", name)
+ return InProcessCronScheduler()
+ logger.info("Using cron scheduler provider: %s", provider.name)
+ return provider
+ except Exception as e:
+ logger.warning(
+ "Failed to load cron.provider '%s' (%s); using built-in ticker", name, e
+ )
+ return InProcessCronScheduler()
+
+
class InProcessCronScheduler(CronScheduler):
"""Default provider: the historical in-process 60s ticker.
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 356839f9903..d53393ac432 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -2124,6 +2124,14 @@ DEFAULT_CONFIG = {
},
"cron": {
+ # Active cron SCHEDULER provider (Axis B — the trigger that decides
+ # WHEN a due job fires). Empty string = the built-in in-process 60s
+ # ticker (default). Name an installed provider (plugins/cron// or
+ # $HERMES_HOME/plugins//) to relocate the trigger — e.g. "chronos",
+ # the NAS-mediated managed-cron provider for scale-to-zero deployments.
+ # An unknown or unavailable provider falls back to the built-in, so cron
+ # never loses its trigger.
+ "provider": "",
# Wrap delivered cron responses with a header (task name) and footer
# ("The agent cannot see this message"). Set to false for clean output.
"wrap_response": True,
diff --git a/plugins/cron/__init__.py b/plugins/cron/__init__.py
new file mode 100644
index 00000000000..fbf1ac2eb08
--- /dev/null
+++ b/plugins/cron/__init__.py
@@ -0,0 +1,344 @@
+"""Cron scheduler provider plugin discovery.
+
+Scans two directories for cron scheduler provider plugins:
+
+1. Bundled providers: ``plugins/cron//`` (shipped with hermes-agent)
+2. User-installed providers: ``$HERMES_HOME/plugins//``
+
+Each subdirectory must contain ``__init__.py`` with a class implementing the
+``CronScheduler`` ABC (``cron/scheduler_provider.py``). On name collisions,
+bundled providers take precedence.
+
+This is a near-verbatim clone of ``plugins/memory/__init__.py`` — the same
+discovery/loader machinery, retargeted at ``CronScheduler``. The built-in
+``InProcessCronScheduler`` is NOT discovered here: it is core (lives in
+``cron/scheduler_provider.py``) so the fallback can never be accidentally
+removed. Only NON-default providers (e.g. "chronos") live under this directory.
+
+Only ONE provider can be active at a time, selected via ``cron.provider`` in
+config.yaml (empty = built-in). See ``cron.scheduler_provider.resolve_cron_scheduler``.
+
+Usage:
+ from plugins.cron import discover_cron_schedulers, load_cron_scheduler
+
+ available = discover_cron_schedulers() # [(name, desc, available), ...]
+ provider = load_cron_scheduler("chronos") # CronScheduler instance
+"""
+
+from __future__ import annotations
+
+import importlib
+import importlib.machinery
+import importlib.util
+import logging
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+_CRON_PLUGINS_DIR = Path(__file__).parent
+
+# Synthetic parent package for user-installed providers, so they don't
+# collide with bundled providers in sys.modules.
+_USER_NAMESPACE = "_hermes_user_cron"
+
+
+def _register_synthetic_package(name: str, search_locations: List[str]) -> None:
+ """Register an empty package shell in sys.modules.
+
+ User-installed providers import as ``_hermes_user_cron.``, a dotted
+ name whose parents exist nowhere on disk. Unless those parents are present
+ in ``sys.modules``, any relative import inside the plugin
+ (``from . import config``) fails with
+ ``ModuleNotFoundError: No module named '_hermes_user_cron'`` — the same
+ reason the loader already registers ``plugins`` and ``plugins.cron`` for
+ bundled providers.
+ """
+ if name in sys.modules:
+ return
+ spec = importlib.machinery.ModuleSpec(name, None, is_package=True)
+ spec.submodule_search_locations = search_locations
+ sys.modules[name] = importlib.util.module_from_spec(spec)
+
+
+# ---------------------------------------------------------------------------
+# Directory helpers
+# ---------------------------------------------------------------------------
+
+def _get_user_plugins_dir() -> Optional[Path]:
+ """Return ``$HERMES_HOME/plugins/`` or None if unavailable."""
+ try:
+ from hermes_constants import get_hermes_home
+ d = get_hermes_home() / "plugins"
+ return d if d.is_dir() else None
+ except Exception:
+ return None
+
+
+def _is_cron_provider_dir(path: Path) -> bool:
+ """Heuristic: does *path* look like a cron scheduler provider plugin?
+
+ Checks for ``register_cron_scheduler`` or ``CronScheduler`` in the
+ ``__init__.py`` source. Cheap text scan — no import needed.
+ """
+ init_file = path / "__init__.py"
+ if not init_file.exists():
+ return False
+ try:
+ source = init_file.read_text(errors="replace")[:8192]
+ return "register_cron_scheduler" in source or "CronScheduler" in source
+ except Exception:
+ return False
+
+
+def _iter_provider_dirs() -> List[Tuple[str, Path]]:
+ """Yield ``(name, path)`` for all discovered provider directories.
+
+ Scans bundled first, then user-installed. Bundled takes precedence on
+ name collisions (first-seen wins via ``seen`` set).
+ """
+ seen: set = set()
+ dirs: List[Tuple[str, Path]] = []
+
+ # 1. Bundled providers (plugins/cron//)
+ if _CRON_PLUGINS_DIR.is_dir():
+ for child in sorted(_CRON_PLUGINS_DIR.iterdir()):
+ if not child.is_dir() or child.name.startswith(("_", ".")):
+ continue
+ if not (child / "__init__.py").exists():
+ continue
+ seen.add(child.name)
+ dirs.append((child.name, child))
+
+ # 2. User-installed providers ($HERMES_HOME/plugins//)
+ user_dir = _get_user_plugins_dir()
+ if user_dir:
+ for child in sorted(user_dir.iterdir()):
+ if not child.is_dir() or child.name.startswith(("_", ".")):
+ continue
+ if child.name in seen:
+ continue # bundled takes precedence
+ if not _is_cron_provider_dir(child):
+ continue # skip non-cron plugins
+ dirs.append((child.name, child))
+
+ return dirs
+
+
+def find_provider_dir(name: str) -> Optional[Path]:
+ """Resolve a provider name to its directory.
+
+ Checks bundled first, then user-installed.
+ """
+ # Bundled
+ bundled = _CRON_PLUGINS_DIR / name
+ if bundled.is_dir() and (bundled / "__init__.py").exists():
+ return bundled
+ # User-installed
+ user_dir = _get_user_plugins_dir()
+ if user_dir:
+ user = user_dir / name
+ if user.is_dir() and _is_cron_provider_dir(user):
+ return user
+ return None
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def discover_cron_schedulers() -> List[Tuple[str, str, bool]]:
+ """Scan bundled and user-installed directories for available providers.
+
+ Returns list of (name, description, is_available) tuples. May be empty —
+ the built-in is core, not discovered here, so a fresh checkout with no
+ bundled non-default provider returns []. Bundled providers take precedence
+ on name collisions.
+ """
+ results = []
+
+ for name, child in _iter_provider_dirs():
+ # Read description from plugin.yaml if available
+ desc = ""
+ yaml_file = child / "plugin.yaml"
+ if yaml_file.exists():
+ try:
+ import yaml
+ with open(yaml_file, encoding="utf-8-sig") as f:
+ meta = yaml.safe_load(f) or {}
+ desc = meta.get("description", "")
+ except Exception:
+ pass
+
+ # Quick availability check — try loading and calling is_available()
+ available = True
+ try:
+ provider = _load_provider_from_dir(child)
+ if provider:
+ available = provider.is_available()
+ else:
+ available = False
+ except Exception:
+ available = False
+
+ results.append((name, desc, available))
+
+ return results
+
+
+def load_cron_scheduler(name: str) -> Optional["CronScheduler"]: # noqa: F821
+ """Load and return a CronScheduler instance by name.
+
+ Checks both bundled (``plugins/cron//``) and user-installed
+ (``$HERMES_HOME/plugins//``) directories. Bundled takes precedence
+ on name collisions.
+
+ Returns None if the provider is not found or fails to load.
+ """
+ provider_dir = find_provider_dir(name)
+ if not provider_dir:
+ logger.debug("Cron provider '%s' not found in bundled or user plugins", name)
+ return None
+
+ try:
+ provider = _load_provider_from_dir(provider_dir)
+ if provider:
+ return provider
+ logger.warning("Cron provider '%s' loaded but no provider instance found", name)
+ return None
+ except Exception as e:
+ logger.warning("Failed to load cron provider '%s': %s", name, e)
+ return None
+
+
+def _load_provider_from_dir(provider_dir: Path) -> Optional["CronScheduler"]: # noqa: F821
+ """Import a provider module and extract the CronScheduler instance.
+
+ The module must have either:
+ - A register(ctx) function (plugin-style) — we simulate a ctx
+ - A top-level class that extends CronScheduler — we instantiate it
+ """
+ name = provider_dir.name
+ # Use a separate namespace for user-installed plugins so they don't
+ # collide with bundled providers in sys.modules.
+ _is_bundled = _CRON_PLUGINS_DIR in provider_dir.parents or provider_dir.parent == _CRON_PLUGINS_DIR
+ module_name = f"plugins.cron.{name}" if _is_bundled else f"{_USER_NAMESPACE}.{name}"
+ init_file = provider_dir / "__init__.py"
+
+ if not init_file.exists():
+ return None
+
+ # Check if already loaded. A synthetic package shell has no __file__;
+ # only reuse modules that were actually loaded from disk.
+ cached = sys.modules.get(module_name)
+ if cached is not None and getattr(cached, "__file__", None):
+ mod = cached
+ else:
+ # Ensure the parent packages are registered (for relative imports)
+ for parent in ("plugins", "plugins.cron"):
+ if parent not in sys.modules:
+ parent_path = Path(__file__).parent
+ if parent == "plugins":
+ parent_path = parent_path.parent
+ parent_init = parent_path / "__init__.py"
+ if parent_init.exists():
+ spec = importlib.util.spec_from_file_location(
+ parent, str(parent_init),
+ submodule_search_locations=[str(parent_path)]
+ )
+ if spec:
+ parent_mod = importlib.util.module_from_spec(spec)
+ sys.modules[parent] = parent_mod
+ try:
+ spec.loader.exec_module(parent_mod)
+ except Exception:
+ pass
+
+ # User-installed plugins need their synthetic parent registered the
+ # same way, or relative imports inside the plugin cannot resolve.
+ if not _is_bundled:
+ _register_synthetic_package(_USER_NAMESPACE, [])
+
+ # Now load the provider module
+ spec = importlib.util.spec_from_file_location(
+ module_name, str(init_file),
+ submodule_search_locations=[str(provider_dir)]
+ )
+ if not spec:
+ return None
+
+ mod = importlib.util.module_from_spec(spec)
+ sys.modules[module_name] = mod
+
+ # Register submodules so relative imports work
+ # e.g., "from ._nas_client import NasCronClient" in the chronos plugin
+ for sub_file in provider_dir.glob("*.py"):
+ if sub_file.name == "__init__.py":
+ continue
+ sub_name = sub_file.stem
+ full_sub_name = f"{module_name}.{sub_name}"
+ if full_sub_name not in sys.modules:
+ sub_spec = importlib.util.spec_from_file_location(
+ full_sub_name, str(sub_file)
+ )
+ if sub_spec:
+ sub_mod = importlib.util.module_from_spec(sub_spec)
+ sys.modules[full_sub_name] = sub_mod
+ try:
+ sub_spec.loader.exec_module(sub_mod)
+ except Exception as e:
+ logger.debug("Failed to load submodule %s: %s", full_sub_name, e)
+
+ try:
+ spec.loader.exec_module(mod)
+ except Exception as e:
+ logger.debug("Failed to exec_module %s: %s", module_name, e)
+ sys.modules.pop(module_name, None)
+ return None
+
+ # Try register(ctx) pattern first (how our plugins are written)
+ if hasattr(mod, "register"):
+ collector = _ProviderCollector()
+ try:
+ mod.register(collector)
+ if collector.provider:
+ return collector.provider
+ except Exception as e:
+ logger.debug("register() failed for %s: %s", name, e)
+
+ # Fallback: find a CronScheduler subclass and instantiate it
+ from cron.scheduler_provider import CronScheduler
+ for attr_name in dir(mod):
+ attr = getattr(mod, attr_name, None)
+ if (isinstance(attr, type) and issubclass(attr, CronScheduler)
+ and attr is not CronScheduler):
+ try:
+ return attr()
+ except Exception:
+ pass
+
+ return None
+
+
+class _ProviderCollector:
+ """Fake plugin context that captures register_cron_scheduler calls."""
+
+ def __init__(self):
+ self.provider = None
+
+ def register_cron_scheduler(self, provider):
+ self.provider = provider
+
+ # No-op for other registration methods
+ def register_tool(self, *args, **kwargs):
+ pass
+
+ def register_hook(self, *args, **kwargs):
+ pass
+
+ def register_memory_provider(self, *args, **kwargs):
+ pass
+
+ def register_cli_command(self, *args, **kwargs):
+ pass
diff --git a/tests/cron/test_scheduler_provider.py b/tests/cron/test_scheduler_provider.py
index 74b3891122c..8fdbb305a0f 100644
--- a/tests/cron/test_scheduler_provider.py
+++ b/tests/cron/test_scheduler_provider.py
@@ -159,3 +159,106 @@ def test_inprocess_provider_stop_is_noop():
from cron.scheduler_provider import InProcessCronScheduler
assert InProcessCronScheduler().stop() is None
+
+
+# ── Phase 2: config key, discovery, resolver ─────────────────────────────────
+
+
+def test_default_config_cron_provider_is_empty():
+ """The new cron.provider key defaults to empty (= built-in)."""
+ from hermes_cli.config import DEFAULT_CONFIG
+
+ assert DEFAULT_CONFIG["cron"]["provider"] == ""
+
+
+def test_discover_cron_schedulers_returns_list():
+ """Discovery returns a list. May be empty — the built-in is core, not
+ discovered, and no bundled non-default provider ships yet."""
+ from plugins.cron import discover_cron_schedulers
+
+ result = discover_cron_schedulers()
+ assert isinstance(result, list)
+
+
+def test_load_unknown_cron_scheduler_returns_none():
+ from plugins.cron import load_cron_scheduler
+
+ assert load_cron_scheduler("does-not-exist-xyz") is None
+
+
+def test_resolve_defaults_to_builtin(monkeypatch):
+ """Empty cron.provider → built-in."""
+ import hermes_cli.config as cfg
+ from cron import scheduler_provider as sp
+
+ monkeypatch.setattr(cfg, "load_config", lambda: {"cron": {"provider": ""}})
+ prov = sp.resolve_cron_scheduler()
+ assert prov.name == "builtin"
+
+
+def test_resolve_no_cron_section_falls_back_to_builtin(monkeypatch):
+ """Config with no cron section at all → built-in (cfg_get returns default)."""
+ import hermes_cli.config as cfg
+ from cron import scheduler_provider as sp
+
+ monkeypatch.setattr(cfg, "load_config", lambda: {})
+ prov = sp.resolve_cron_scheduler()
+ assert prov.name == "builtin"
+
+
+def test_resolve_unknown_provider_falls_back_to_builtin(monkeypatch):
+ """A named provider that doesn't exist → built-in (cron never dies)."""
+ import hermes_cli.config as cfg
+ from cron import scheduler_provider as sp
+
+ monkeypatch.setattr(cfg, "load_config", lambda: {"cron": {"provider": "nope-not-real"}})
+ prov = sp.resolve_cron_scheduler()
+ assert prov.name == "builtin"
+
+
+def test_resolve_unavailable_provider_falls_back(monkeypatch):
+ """A provider that loads but reports is_available()==False → built-in."""
+ import hermes_cli.config as cfg
+ import plugins.cron as pc
+ from cron import scheduler_provider as sp
+ from cron.scheduler_provider import CronScheduler
+
+ class Unavailable(CronScheduler):
+ @property
+ def name(self):
+ return "unavailable"
+
+ def is_available(self):
+ return False
+
+ def start(self, stop_event, **kw):
+ pass
+
+ monkeypatch.setattr(cfg, "load_config", lambda: {"cron": {"provider": "unavailable"}})
+ monkeypatch.setattr(pc, "load_cron_scheduler", lambda n: Unavailable())
+ prov = sp.resolve_cron_scheduler()
+ assert prov.name == "builtin"
+
+
+def test_resolve_available_provider_is_used(monkeypatch):
+ """A provider that loads and is available is returned (not the fallback)."""
+ import hermes_cli.config as cfg
+ import plugins.cron as pc
+ from cron import scheduler_provider as sp
+ from cron.scheduler_provider import CronScheduler
+
+ class Fake(CronScheduler):
+ @property
+ def name(self):
+ return "fake"
+
+ def is_available(self):
+ return True
+
+ def start(self, stop_event, **kw):
+ pass
+
+ monkeypatch.setattr(cfg, "load_config", lambda: {"cron": {"provider": "fake"}})
+ monkeypatch.setattr(pc, "load_cron_scheduler", lambda n: Fake())
+ prov = sp.resolve_cron_scheduler()
+ assert prov.name == "fake"
From abbd8646eb511833500377799f5853d8d4eda5a2 Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:14:53 +1000
Subject: [PATCH 004/470] feat(gateway,desktop): start cron via resolved
CronScheduler provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 3 — rebind both ticker call sites to resolve_cron_scheduler(). Default
(built-in) path is byte-identical; Phase 0 characterization tests + the full
gateway suite (6919) stay green.
Task 3.1: split gateway/run.py _start_cron_ticker into:
- _start_gateway_housekeeping() — the gateway-only chores (channel-dir
refresh, image/doc cache cleanup, paste sweep, curator poll), now on their
own loop/thread, independent of which cron provider is active.
- _start_cron_ticker() — kept as a DEPRECATED shim that runs only the
built-in InProcessCronScheduler().start(), preserving the symbol for
hermes_cli/debug.py and the Phase 0 characterization test.
Task 3.2: start_gateway() resolves the provider and runs provider.start() in
the 'cron-scheduler' thread, plus a second 'gateway-housekeeping' thread;
teardown sets the shared cron_stop, calls provider.stop(), joins both.
Task 3.3: desktop _start_desktop_cron_ticker() swapped its inline tick loop for
resolve_cron_scheduler().start() (no adapters/loop — desktop has none).
The provider owns ONLY the cron tick (so an external scale-to-zero provider
with no 60s loop fits); gateway housekeeping is decoupled from the cron
trigger. Both threads share cron_stop.
Verified: full tests/cron/ (453) + full tests/gateway/ (6919) green. Manual
gateway smoke (Task 3.4) is operator-run, pending.
---
gateway/run.py | 87 +++++++++++++++++++++++++++-------------
hermes_cli/web_server.py | 25 +++++-------
2 files changed, 70 insertions(+), 42 deletions(-)
diff --git a/gateway/run.py b/gateway/run.py
index 4b41cfc6aec..2f5900e92f5 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -16454,21 +16454,20 @@ def _run_planned_stop_watcher(
stop_event.wait(poll_interval)
-def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
- """
- Background thread that ticks the cron scheduler at a regular interval.
-
- Runs inside the gateway process so cronjobs fire automatically without
- needing a separate `hermes cron daemon` or system cron entry.
+def _start_gateway_housekeeping(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
+ """Background thread for gateway-only periodic chores (NOT cron).
- When ``adapters`` and ``loop`` are provided, passes them through to the
- cron delivery path so live adapters can be used for E2EE rooms.
+ Split out of the historical ``_start_cron_ticker`` so the cron *trigger*
+ can live behind the ``CronScheduler`` provider (built-in or external) while
+ these gateway-specific chores keep running independently of which provider
+ fires cron. An external scale-to-zero provider has no 60s loop at all, but
+ this housekeeping still wants its hourly cadence — so it owns its own loop.
- Also refreshes the channel directory every 5 minutes and prunes the
- image/audio/document cache + expired ``hermes debug share`` pastes
- once per hour.
+ Refreshes the channel directory every 5 minutes and prunes the
+ image/audio/document cache + expired ``hermes debug share`` pastes once per
+ hour, and polls the curator hourly (its inner gate enforces the real
+ weekly cadence).
"""
- from cron.scheduler import tick as cron_tick
from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
from hermes_cli.debug import _sweep_expired_pastes
@@ -16477,14 +16476,9 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
PASTE_SWEEP_EVERY = 60 # ticks — once per hour
CURATOR_EVERY = 60 # ticks — poll hourly (inner gate handles the real cadence)
- logger.info("Cron ticker started (interval=%ds)", interval)
+ logger.info("Gateway housekeeping started (interval=%ds)", interval)
tick_count = 0
while not stop_event.is_set():
- try:
- cron_tick(verbose=False, adapters=adapters, loop=loop, sync=False)
- except Exception as e:
- logger.debug("Cron tick error: %s", e)
-
tick_count += 1
if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
@@ -16492,9 +16486,9 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
from gateway.channel_directory import build_channel_directory
if loop is not None:
# build_channel_directory is async (Slack web calls), and
- # this ticker runs in a background thread. Schedule onto
- # the gateway event loop and wait briefly for completion
- # so refresh failures are still logged via the except.
+ # this runs in a background thread. Schedule onto the
+ # gateway event loop and wait briefly for completion so
+ # refresh failures are still logged via the except.
fut = safe_schedule_threadsafe(
build_channel_directory(adapters), loop,
logger=logger,
@@ -16530,7 +16524,7 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
except Exception as e:
logger.debug("Paste sweep error: %s", e)
- # Curator — piggy-back on the existing cron ticker so long-running
+ # Curator — piggy-back on the housekeeping loop so long-running
# gateways get weekly skill maintenance without needing restarts.
# maybe_run_curator() is internally gated by config.interval_hours
# (7 days by default), so CURATOR_EVERY is just the poll rate — the
@@ -16546,7 +16540,22 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
logger.debug("Curator tick error: %s", e)
stop_event.wait(timeout=interval)
- logger.info("Cron ticker stopped")
+ logger.info("Gateway housekeeping stopped")
+
+
+def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
+ """DEPRECATED shim — preserved for backward compatibility.
+
+ The cron trigger now lives behind the ``CronScheduler`` provider
+ (``cron.scheduler_provider``); the gateway resolves a provider and runs its
+ ``start()`` directly (see ``start_gateway``). This shim runs ONLY the
+ built-in in-process tick loop, exactly as before, for any external caller
+ or test that still references this symbol (e.g. hermes_cli/debug.py). It no
+ longer runs gateway housekeeping — that moved to
+ ``_start_gateway_housekeeping``.
+ """
+ from cron.scheduler_provider import InProcessCronScheduler
+ InProcessCronScheduler().start(stop_event, adapters=adapters, loop=loop, interval=interval)
async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False, verbosity: Optional[int] = 0) -> bool:
@@ -16942,17 +16951,34 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
return True
- # Start background cron ticker so scheduled jobs fire automatically.
- # Pass the event loop so cron delivery can use live adapters (E2EE support).
+ # Start the background cron scheduler via the resolved provider so
+ # scheduled jobs fire automatically. The built-in provider is the
+ # historical in-process 60s ticker; an external provider (e.g. chronos)
+ # may arm a schedule and return. Pass the event loop so cron delivery can
+ # use live adapters (E2EE support).
+ from cron.scheduler_provider import resolve_cron_scheduler
cron_stop = threading.Event()
+ cron_provider = resolve_cron_scheduler()
cron_thread = threading.Thread(
- target=_start_cron_ticker,
+ target=cron_provider.start,
args=(cron_stop,),
kwargs={"adapters": runner.adapters, "loop": asyncio.get_running_loop()},
daemon=True,
- name="cron-ticker",
+ name="cron-scheduler",
)
cron_thread.start()
+
+ # Gateway-only periodic housekeeping (channel dir, cache cleanup, paste
+ # sweep, curator) — runs independently of which cron provider is active.
+ # Shares cron_stop as the shutdown signal.
+ housekeeping_thread = threading.Thread(
+ target=_start_gateway_housekeeping,
+ args=(cron_stop,),
+ kwargs={"adapters": runner.adapters, "loop": asyncio.get_running_loop()},
+ daemon=True,
+ name="gateway-housekeeping",
+ )
+ housekeeping_thread.start()
# Wait for shutdown
await runner.wait_for_shutdown()
@@ -16962,9 +16988,14 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
logger.error("Gateway exiting with failure: %s", runner.exit_reason)
return False
- # Stop cron ticker cleanly
+ # Stop cron scheduler + housekeeping cleanly
cron_stop.set()
+ try:
+ cron_provider.stop()
+ except Exception as e:
+ logger.debug("Cron provider stop() error: %s", e)
cron_thread.join(timeout=5)
+ housekeeping_thread.join(timeout=5)
# Stop the planned-stop watcher (daemon=True so this is belt-and-suspenders).
_planned_stop_watcher_stop.set()
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 70f39162cf8..768084eba36 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -113,23 +113,20 @@ def _start_desktop_cron_ticker(stop_event: "threading.Event", interval: int = 60
The scheduler tick loop normally lives in ``hermes gateway run`` — but the
desktop app spawns a ``hermes dashboard`` backend, not a gateway, so a cron
- a user creates in the app would never fire. We run a minimal ticker here
- (no live adapters; delivery falls back to the per-platform send path).
+ a user creates in the app would never fire. We run the resolved cron
+ scheduler provider here (no live adapters; delivery falls back to the
+ per-platform send path).
- Cross-process safe: ``cron.scheduler.tick`` takes the ``cron/.tick.lock``
- file lock, so this never double-fires alongside a real gateway on the same
- HERMES_HOME — whichever process grabs the lock first wins the tick.
+ Cross-process safe: the built-in provider's ``cron.scheduler.tick`` takes
+ the ``cron/.tick.lock`` file lock, so this never double-fires alongside a
+ real gateway on the same HERMES_HOME — whichever process grabs the lock
+ first wins the tick.
"""
- from cron.scheduler import tick as cron_tick
+ from cron.scheduler_provider import resolve_cron_scheduler
- _log.info("Desktop cron ticker started (interval=%ds)", interval)
- # Tick once up front (catches jobs due at launch), then on the interval.
- while not stop_event.is_set():
- try:
- cron_tick(verbose=False, sync=False)
- except Exception as e:
- _log.debug("Desktop cron tick error: %s", e)
- stop_event.wait(interval)
+ provider = resolve_cron_scheduler()
+ _log.info("Desktop cron scheduler started (provider=%s, interval=%ds)", provider.name, interval)
+ provider.start(stop_event, interval=interval)
@asynccontextmanager
From bfb6e0bb33e61cef064ab5b41f91716bc02a474b Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:18:31 +1000
Subject: [PATCH 005/470] docs(cron): document CronScheduler provider +
cron.provider key
Phase 3.5. cron-internals.md gateway-integration section now describes the
pluggable trigger (resolve_cron_scheduler, built-in default, plugins/cron
discovery, the never-without-a-trigger fallback, and the trigger-vs-execution
split). cli-commands.md notes cron.provider near the hermes cron entry.
---
.../docs/developer-guide/cron-internals.md | 25 ++++++++++++++++++-
website/docs/reference/cli-commands.md | 7 ++++++
2 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/website/docs/developer-guide/cron-internals.md b/website/docs/developer-guide/cron-internals.md
index bad59645dbc..c895d339b09 100644
--- a/website/docs/developer-guide/cron-internals.md
+++ b/website/docs/developer-guide/cron-internals.md
@@ -102,7 +102,30 @@ tick()
### Gateway Integration
-In gateway mode, the scheduler runs in a dedicated background thread (`_start_cron_ticker` in `gateway/run.py`) that calls `scheduler.tick()` every 60 seconds alongside message handling.
+In gateway mode, the cron **trigger** (the part that decides *when* a due job
+fires — "Axis B") is selected through a pluggable `CronScheduler` provider. The
+gateway calls `resolve_cron_scheduler()` (`cron/scheduler_provider.py`) and runs
+the resolved provider's `start()` in a dedicated background thread, alongside a
+separate gateway-housekeeping thread.
+
+The active provider is chosen by the `cron.provider` config key:
+
+- **empty (default)** → the built-in `InProcessCronScheduler`, which runs the
+ historical in-process loop calling `scheduler.tick()` every 60 seconds. This
+ is byte-identical to the pre-provider behavior.
+- **a named provider** (e.g. `chronos`, a managed-cron provider for
+ scale-to-zero deployments) → discovered from `plugins/cron//` or
+ `$HERMES_HOME/plugins//`.
+
+If a named provider is missing, fails to load, or reports `is_available() ==
+False`, the resolver falls back to the built-in with a warning — **cron is
+never left without a trigger.** The built-in provider lives in core
+(`cron/scheduler_provider.py`), not in `plugins/`, so the fallback can't be
+accidentally removed.
+
+What "firing" *means* (job execution + delivery) is unchanged and shared by all
+providers — it stays in `scheduler.run_job()` / `scheduler._deliver_result()`.
+A provider only controls the trigger, never execution.
In CLI mode, cron jobs only fire when `hermes cron` commands are run or during active CLI sessions.
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 3071ac0e5fc..f0fe67d4349 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -533,6 +533,13 @@ hermes cron
| `status` | Check whether the cron scheduler is running. |
| `tick` | Run due jobs once and exit. |
+The cron **trigger** is pluggable via the `cron.provider` config key. Empty
+(the default) uses the built-in in-process ticker. A named provider (e.g.
+`chronos`, a managed-cron provider for scale-to-zero deployments) is discovered
+from `plugins/cron//` or `$HERMES_HOME/plugins//`; an unknown or
+unavailable provider falls back to the built-in, so cron is never left without
+a trigger. See the [cron internals](../developer-guide/cron-internals.md#gateway-integration) doc.
+
## `hermes kanban`
```bash
From 58b19a4f6988f2fda2cddb5c620628afce750a36 Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:26:29 +1000
Subject: [PATCH 006/470] refactor(cron): extract run_one_job shared firing
helper from tick
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 4A. Factor tick's per-job closure (_process_job: execute → save →
deliver → mark) into a module-level run_one_job(job, *, adapters, loop,
verbose) so the external Chronos provider's fire_due (Phase 4D) reuses the
IDENTICAL body — no duplicated correctness. tick's _process_job is now a thin
wrapper calling run_one_job; the pool/in-flight-guard/contextvars dispatch
logic is unchanged.
run_one_job fires ONE given job; it does NOT decide due-ness, claim, or compute
next_run (tick advances next_run_at under the file lock; an external provider
claims via the store CAS in Phase 4C). Pure refactor, no behavior change.
TDD: test_run_one_job.py characterizes the sequence through tick() first
(test_tick_process_job_sequence, passed pre-extraction), then unit-tests the
helper directly: success sequence, [SILENT]→skip delivery, empty-response soft
failure (#8585), failed-job-still-delivers, exception→mark-failed.
Verified: tests/cron/ 459 passed (was 453 + 6 new); tick behavior unchanged.
---
cron/scheduler.py | 105 +++++++++++++++++------------
tests/cron/test_run_one_job.py | 119 +++++++++++++++++++++++++++++++++
2 files changed, 182 insertions(+), 42 deletions(-)
create mode 100644 tests/cron/test_run_one_job.py
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 35906996619..9bab59456ea 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -1967,6 +1967,64 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)
+def run_one_job(job: dict, *, adapters=None, loop=None, verbose: bool = False) -> bool:
+ """Run ONE due job end-to-end: execute → save output → deliver → mark.
+
+ This is the shared firing body extracted from ``tick``'s per-job closure so
+ that BOTH the built-in ticker and an external provider's ``fire_due`` (e.g.
+ Chronos) run the identical sequence — no duplicated correctness.
+
+ It does NOT decide whether the job is due, claim it, or compute the next
+ run — those are the caller's concern (``tick`` advances ``next_run_at``
+ under the file lock before dispatch; an external provider claims via the
+ store CAS). This function only fires the given job once.
+
+ Returns True if the job was processed (even if the job itself failed —
+ failure is recorded via ``mark_job_run``), False only if processing raised.
+ """
+ try:
+ success, output, final_response, error = run_job(job)
+
+ output_file = save_job_output(job["id"], output)
+ if verbose:
+ logger.info("Output saved to: %s", output_file)
+
+ # Deliver the final response to the origin/target chat.
+ # If the agent responded with [SILENT], skip delivery (but
+ # output is already saved above). Failed jobs always deliver.
+ deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
+ # Treat whitespace-only final responses the same as empty
+ # responses: do not deliver a blank message, and let the
+ # empty-response guard below mark the run as a soft failure.
+ should_deliver = bool(deliver_content.strip())
+ if should_deliver and success and SILENT_MARKER in deliver_content.strip().upper():
+ logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
+ should_deliver = False
+
+ delivery_error = None
+ if should_deliver:
+ try:
+ delivery_error = _deliver_result(job, deliver_content, adapters=adapters, loop=loop)
+ except Exception as de:
+ delivery_error = str(de)
+ logger.error("Delivery failed for job %s: %s", job["id"], de)
+
+ # Treat empty final_response as a soft failure so last_status
+ # is not "ok" — the agent ran but produced nothing useful.
+ # (issue #8585)
+ if success and not final_response.strip():
+ success = False
+ error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
+
+ mark_job_run(job["id"], success, error, delivery_error=delivery_error)
+ return True
+
+ except Exception as e:
+ logger.error("Error processing job %s: %s", job['id'], e)
+ mark_job_run(job["id"], False, str(e))
+ return False
+
+
def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> int:
"""
Check and run all due jobs.
@@ -2045,48 +2103,11 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i
)
def _process_job(job: dict) -> bool:
- """Run one due job end-to-end: execute, save, deliver, mark."""
- try:
- success, output, final_response, error = run_job(job)
-
- output_file = save_job_output(job["id"], output)
- if verbose:
- logger.info("Output saved to: %s", output_file)
-
- # Deliver the final response to the origin/target chat.
- # If the agent responded with [SILENT], skip delivery (but
- # output is already saved above). Failed jobs always deliver.
- deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
- # Treat whitespace-only final responses the same as empty
- # responses: do not deliver a blank message, and let the
- # empty-response guard below mark the run as a soft failure.
- should_deliver = bool(deliver_content.strip())
- if should_deliver and success and SILENT_MARKER in deliver_content.strip().upper():
- logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
- should_deliver = False
-
- delivery_error = None
- if should_deliver:
- try:
- delivery_error = _deliver_result(job, deliver_content, adapters=adapters, loop=loop)
- except Exception as de:
- delivery_error = str(de)
- logger.error("Delivery failed for job %s: %s", job["id"], de)
-
- # Treat empty final_response as a soft failure so last_status
- # is not "ok" — the agent ran but produced nothing useful.
- # (issue #8585)
- if success and not final_response.strip():
- success = False
- error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
-
- mark_job_run(job["id"], success, error, delivery_error=delivery_error)
- return True
-
- except Exception as e:
- logger.error("Error processing job %s: %s", job['id'], e)
- mark_job_run(job["id"], False, str(e))
- return False
+ """Run one due job end-to-end. Thin wrapper around the shared
+ module-level ``run_one_job`` so ``tick`` and external providers
+ (Chronos ``fire_due``) use the identical execute→save→deliver→mark
+ body."""
+ return run_one_job(job, adapters=adapters, loop=loop, verbose=verbose)
# Partition due jobs: those with a per-job workdir mutate
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
diff --git a/tests/cron/test_run_one_job.py b/tests/cron/test_run_one_job.py
new file mode 100644
index 00000000000..7da6b1c14f4
--- /dev/null
+++ b/tests/cron/test_run_one_job.py
@@ -0,0 +1,119 @@
+"""Characterization + unit tests for the `run_one_job` shared helper (Phase 4A).
+
+`tick`'s per-job body (`_process_job`) is the execute → save → deliver → mark
+sequence that fires ONE due job. Phase 4A extracts it into a module-level
+`run_one_job(job, *, adapters=None, loop=None, verbose=False)` so the external
+Chronos provider's `fire_due` can reuse the IDENTICAL body — no duplicated
+correctness.
+
+The first test characterizes the sequence as driven through `tick()` (proving
+the extraction didn't change `tick`'s behavior); the rest unit-test the
+extracted helper directly.
+"""
+import cron.scheduler as s
+
+
+def _patch_pipeline(monkeypatch, *, success=True, output="out", final="final response",
+ error=None, silent_marker_in=None):
+ """Patch the job pipeline primitives and record the call order."""
+ calls = []
+
+ def fake_run_job(job):
+ calls.append(("run_job", job["id"]))
+ fr = final if silent_marker_in is None else silent_marker_in
+ return (success, output, fr, error)
+
+ def fake_save(jid, out):
+ calls.append(("save", jid))
+ return f"/tmp/{jid}.txt"
+
+ def fake_deliver(job, content, adapters=None, loop=None):
+ calls.append(("deliver", job["id"]))
+ return None
+
+ def fake_mark(jid, ok, err=None, delivery_error=None):
+ calls.append(("mark", jid, ok))
+
+ monkeypatch.setattr(s, "run_job", fake_run_job)
+ monkeypatch.setattr(s, "save_job_output", fake_save)
+ monkeypatch.setattr(s, "_deliver_result", fake_deliver)
+ monkeypatch.setattr(s, "mark_job_run", fake_mark)
+ return calls
+
+
+def test_tick_process_job_sequence(monkeypatch):
+ """Characterization: a single due job driven through tick() runs the
+ sequence run_job → save → deliver → mark, in that order."""
+ calls = _patch_pipeline(monkeypatch)
+ monkeypatch.setattr(s, "get_due_jobs", lambda: [{"id": "j1", "name": "t"}])
+ monkeypatch.setattr(s, "advance_next_run", lambda jid: True)
+
+ s.tick(verbose=False, sync=True)
+
+ assert [c[0] for c in calls] == ["run_job", "save", "deliver", "mark"]
+ assert calls[-1] == ("mark", "j1", True)
+
+
+def test_run_one_job_success_sequence(monkeypatch):
+ """The extracted helper runs the same execute→save→deliver→mark sequence
+ for a successful job."""
+ calls = _patch_pipeline(monkeypatch)
+
+ ok = s.run_one_job({"id": "j2", "name": "t"})
+
+ assert ok is True
+ assert [c[0] for c in calls] == ["run_job", "save", "deliver", "mark"]
+ assert calls[-1] == ("mark", "j2", True)
+
+
+def test_run_one_job_silent_skips_delivery(monkeypatch):
+ """A [SILENT] final response saves output + marks the run but does NOT
+ deliver."""
+ calls = _patch_pipeline(monkeypatch, silent_marker_in="[SILENT]")
+
+ s.run_one_job({"id": "j3", "name": "t"})
+
+ kinds = [c[0] for c in calls]
+ assert "run_job" in kinds and "save" in kinds and "mark" in kinds
+ assert "deliver" not in kinds
+
+
+def test_run_one_job_empty_response_is_soft_failure(monkeypatch):
+ """An empty final response marks the run as NOT ok (issue #8585)."""
+ calls = _patch_pipeline(monkeypatch, final=" ")
+
+ s.run_one_job({"id": "j4", "name": "t"})
+
+ mark = [c for c in calls if c[0] == "mark"][0]
+ assert mark == ("mark", "j4", False)
+
+
+def test_run_one_job_failed_job_delivers_error(monkeypatch):
+ """A failed job still delivers (the error notice) and marks not-ok."""
+ calls = _patch_pipeline(monkeypatch, success=False, final="", error="boom")
+
+ s.run_one_job({"id": "j5", "name": "t"})
+
+ kinds = [c[0] for c in calls]
+ assert "deliver" in kinds # failures always deliver
+ mark = [c for c in calls if c[0] == "mark"][0]
+ assert mark == ("mark", "j5", False)
+
+
+def test_run_one_job_exception_marks_failure(monkeypatch):
+ """If run_job raises, the helper marks the run failed and returns False
+ rather than propagating."""
+ def boom(job):
+ raise RuntimeError("kaboom")
+
+ monkeypatch.setattr(s, "run_job", boom)
+ marks = []
+ monkeypatch.setattr(
+ s, "mark_job_run",
+ lambda jid, ok, err=None, delivery_error=None: marks.append((jid, ok)),
+ )
+
+ ok = s.run_one_job({"id": "j6", "name": "t"})
+
+ assert ok is False
+ assert marks == [("j6", False)]
From 6ff5fd373b6695b1ed7b7e0f63fde6a8430d16e6 Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:30:31 +1000
Subject: [PATCH 007/470] feat(cron): additive CronScheduler hooks
(on_jobs_changed/fire_due/reconcile)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 4B. Three NON-abstract hooks on the CronScheduler ABC, all with
built-in-safe defaults so the built-in inherits them without overriding and
test_abc_growth_stays_additive stays green (required surface still {name,
start}):
- on_jobs_changed(): post-mutation reconcile hook. Built-in no-op.
- fire_due(job_id): claim the job via the store CAS (claim_job_for_fire,
Phase 4C) then run it through the shared run_one_job (Phase 4A). Returns
False if the claim is lost or the job vanished (repeat-N exhausted between
arm and fire). The inbound webhook (Phase 4E) routes here.
- reconcile(): converge the external registry toward jobs.json. Built-in no-op.
fire_due imports claim_job_for_fire/get_job/run_one_job INSIDE the method, so
this commits cleanly before Phase 4C lands claim_job_for_fire (import-time is
unaffected; tests monkeypatch it with raising=False).
Tests: required-surface-unchanged guard, built-in inherits no-op defaults, and
fire_due's three paths (claim+run, lost-claim→no-run, missing-job→no-run).
tests/cron/ green (20 in test_scheduler_provider.py).
---
cron/scheduler_provider.py | 39 +++++++++++++++
tests/cron/test_scheduler_provider.py | 70 +++++++++++++++++++++++++++
2 files changed, 109 insertions(+)
diff --git a/cron/scheduler_provider.py b/cron/scheduler_provider.py
index 45243e7749c..50bca6b892b 100644
--- a/cron/scheduler_provider.py
+++ b/cron/scheduler_provider.py
@@ -71,6 +71,45 @@ class CronScheduler(ABC):
resources (queue consumers, HTTP servers)."""
return None
+ # --- Optional hooks for external providers (added Phase 4). --------------
+ # All default-safe so the built-in inherits working behavior without
+ # overriding. Keep these NON-abstract — see test_abc_growth_stays_additive.
+
+ def on_jobs_changed(self) -> None:
+ """Called after a successful store mutation (create/update/remove/
+ pause/resume). External providers reconcile their registry here (e.g.
+ Chronos re-provisions/cancels the affected one-shot via NAS).
+ Built-in: no-op (it re-reads jobs.json on every tick)."""
+ return None
+
+ def fire_due(self, job_id: str, *, adapters: Any = None, loop: Any = None) -> bool:
+ """Run a single job NOW via the shared orchestrator. Called by the
+ inbound fire webhook when an external scheduler signals a job is due.
+
+ The default claims the job with a store-level compare-and-set
+ (multi-machine at-most-once), then runs it via the shared
+ ``run_one_job`` body. Built-in never calls this (it has its own tick
+ loop); an external provider routes its inbound fire here.
+
+ Returns True if THIS caller claimed and ran the job, False if the claim
+ was lost (another machine/retry won it) or the job no longer exists.
+ """
+ from cron.jobs import claim_job_for_fire, get_job
+ from cron.scheduler import run_one_job
+
+ if not claim_job_for_fire(job_id):
+ return False # another machine already claimed this fire
+ job = get_job(job_id)
+ if job is None:
+ return False # job removed (e.g. repeat-N exhausted) between arm and fire
+ return run_one_job(job, adapters=adapters, loop=loop)
+
+ def reconcile(self) -> None:
+ """Converge the external registry toward jobs.json (the desired state):
+ arm missing one-shots, cancel orphaned ones, re-arm changed times.
+ Built-in: no-op."""
+ return None
+
def resolve_cron_scheduler() -> "CronScheduler":
"""Return the active cron scheduler provider.
diff --git a/tests/cron/test_scheduler_provider.py b/tests/cron/test_scheduler_provider.py
index 8fdbb305a0f..2b2e159e2a3 100644
--- a/tests/cron/test_scheduler_provider.py
+++ b/tests/cron/test_scheduler_provider.py
@@ -262,3 +262,73 @@ def test_resolve_available_provider_is_used(monkeypatch):
monkeypatch.setattr(pc, "load_cron_scheduler", lambda n: Fake())
prov = sp.resolve_cron_scheduler()
assert prov.name == "fake"
+
+
+# ── Phase 4B: additive hooks (on_jobs_changed / fire_due / reconcile) ────────
+
+
+def test_hooks_did_not_change_required_surface():
+ """The additive hooks must NOT become abstractmethods — the Phase-1 guard
+ still holds (required surface is exactly name + start)."""
+ from cron.scheduler_provider import CronScheduler
+
+ assert set(CronScheduler.__abstractmethods__) == {"name", "start"}
+
+
+def test_builtin_inherits_hook_defaults():
+ """The built-in inherits no-op defaults for the new hooks (it never needs
+ to override them)."""
+ from cron.scheduler_provider import InProcessCronScheduler
+
+ p = InProcessCronScheduler()
+ assert p.on_jobs_changed() is None
+ assert p.reconcile() is None
+ # built-in does not override fire_due; it simply isn't called for built-in.
+ assert hasattr(p, "fire_due")
+
+
+def test_fire_due_default_claims_then_runs(monkeypatch):
+ """The default fire_due claims via the store CAS, fetches the job, and runs
+ it through the shared run_one_job body."""
+ import cron.jobs as jobs
+ import cron.scheduler as sched
+ from cron.scheduler_provider import InProcessCronScheduler
+
+ ran = []
+ monkeypatch.setattr(jobs, "claim_job_for_fire", lambda jid: True, raising=False)
+ monkeypatch.setattr(jobs, "get_job", lambda jid: {"id": jid, "name": "t"})
+ monkeypatch.setattr(sched, "run_one_job", lambda job, **kw: ran.append(job["id"]) or True)
+
+ assert InProcessCronScheduler().fire_due("j1") is True
+ assert ran == ["j1"]
+
+
+def test_fire_due_lost_claim_does_not_run(monkeypatch):
+ """If the CAS claim is lost (another machine/retry won), fire_due returns
+ False and never runs the job."""
+ import cron.jobs as jobs
+ import cron.scheduler as sched
+ from cron.scheduler_provider import InProcessCronScheduler
+
+ ran = []
+ monkeypatch.setattr(jobs, "claim_job_for_fire", lambda jid: False, raising=False)
+ monkeypatch.setattr(sched, "run_one_job", lambda job, **kw: ran.append(job["id"]) or True)
+
+ assert InProcessCronScheduler().fire_due("j1") is False
+ assert ran == []
+
+
+def test_fire_due_missing_job_does_not_run(monkeypatch):
+ """If the job vanished between arm and fire (e.g. repeat-N exhausted),
+ fire_due returns False without running."""
+ import cron.jobs as jobs
+ import cron.scheduler as sched
+ from cron.scheduler_provider import InProcessCronScheduler
+
+ ran = []
+ monkeypatch.setattr(jobs, "claim_job_for_fire", lambda jid: True, raising=False)
+ monkeypatch.setattr(jobs, "get_job", lambda jid: None)
+ monkeypatch.setattr(sched, "run_one_job", lambda job, **kw: ran.append(job["id"]) or True)
+
+ assert InProcessCronScheduler().fire_due("gone") is False
+ assert ran == []
From b01eee0c77e182f1c6f9d101c5851fbe4b5efae3 Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:34:34 +1000
Subject: [PATCH 008/470] feat(cron): store-level CAS claim for multi-machine
at-most-once fire
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 4C. claim_job_for_fire(job_id, *, claim_ttl_seconds=300) in cron/jobs.py:
under the existing _jobs_lock() file lock, claim a job for a single external
fire so that across N gateway replicas exactly ONE wins. Single-machine
deployments always win (unaffected).
Semantics:
- missing / disabled / paused job → False.
- a fresh fire_claim (younger than claim_ttl_seconds) already present → False
(someone else holds it). Stale claim (crashed winner) → overwrite, so a job
is never wedged forever.
- on win: stamp fire_claim={at, by:_machine_id()}; for recurring (cron/interval)
advance next_run_at (mirrors advance_next_run's at-most-once bump so a stale
re-delivery can't re-fire); one-shots keep next_run_at but the fresh claim
blocks a duplicate retry for the same fire.
- mark_job_run now clears fire_claim on completion so a re-armed recurring job
is claimable again next fire.
_machine_id() (HERMES_MACHINE_ID env, else hostname:pid) is attribution-only;
correctness is the file lock + fresh-claim check, not the id.
This is consumed by CronScheduler.fire_due (Phase 4B). tick is untouched — it
still uses advance_next_run, so the built-in single-machine path is unaffected.
Tests (real store, temp HERMES_HOME): claim-once-then-block + next_run advance,
one-shot no-double-claim, unknown→False, paused→False, stale-claim reclaimable,
mark_job_run clears the claim (recurring re-claimable). tests/cron/ 470 passed.
---
cron/jobs.py | 68 ++++++++++++++++++++++
tests/cron/test_claim_job_for_fire.py | 84 +++++++++++++++++++++++++++
2 files changed, 152 insertions(+)
create mode 100644 tests/cron/test_claim_job_for_fire.py
diff --git a/cron/jobs.py b/cron/jobs.py
index 178bd0fad81..2f44608d649 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -976,6 +976,9 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
+ # Clear any external-fire claim so a re-armed recurring job can
+ # be claimed again on its next fire (Phase 4C CAS).
+ job["fire_claim"] = None
# Increment completed count
if job.get("repeat"):
@@ -1057,6 +1060,71 @@ def advance_next_run(job_id: str) -> bool:
return False
+def _machine_id() -> str:
+ """Stable-ish identifier for claim attribution/debugging (NOT correctness).
+
+ Uses ``HERMES_MACHINE_ID`` if set, else hostname + pid. The CAS correctness
+ comes from the file lock + the fresh-claim check, not from this value.
+ """
+ explicit = os.getenv("HERMES_MACHINE_ID", "").strip()
+ if explicit:
+ return explicit
+ try:
+ import socket
+ host = socket.gethostname()
+ except Exception:
+ host = "unknown"
+ return f"{host}:{os.getpid()}"
+
+
+def claim_job_for_fire(job_id: str, *, claim_ttl_seconds: int = 300) -> bool:
+ """Atomically claim a job for a single external 'fire' (multi-machine
+ at-most-once). Returns True iff THIS caller won the claim.
+
+ Used by the external-provider fire path (``CronScheduler.fire_due``) when an
+ external scheduler (Chronos) signals a job is due across N gateway replicas:
+ exactly one wins. Single-machine deployments always win.
+
+ Under the file lock: reject if the job is missing/disabled/paused. If a
+ fresh claim (younger than ``claim_ttl_seconds``) already exists, lose.
+ Otherwise stamp a ``fire_claim`` and, for recurring jobs, advance
+ ``next_run_at`` (mirrors ``advance_next_run``'s at-most-once bump so a stale
+ re-delivery for the old time can't re-fire). One-shots keep ``next_run_at``
+ but the fresh ``fire_claim`` blocks a duplicate retry for the same fire.
+ ``mark_job_run`` clears the claim on completion so a re-armed recurring job
+ is claimable again next fire.
+
+ The stale-claim TTL means a machine that crashed after claiming but before
+ completing doesn't wedge the job forever — after the TTL another fire can
+ reclaim it.
+ """
+ with _jobs_lock():
+ jobs = load_jobs()
+ for job in jobs:
+ if job["id"] != job_id:
+ continue
+ if not job.get("enabled", True) or job.get("state") == "paused":
+ return False
+ now = _hermes_now()
+ existing = job.get("fire_claim")
+ if existing:
+ try:
+ claimed_at = _ensure_aware(datetime.fromisoformat(existing["at"]))
+ if (now - claimed_at).total_seconds() < claim_ttl_seconds:
+ return False # someone holds a fresh claim
+ except Exception:
+ pass # malformed claim → overwrite
+ job["fire_claim"] = {"at": now.isoformat(), "by": _machine_id()}
+ kind = job.get("schedule", {}).get("kind")
+ if kind in {"cron", "interval"}:
+ nxt = compute_next_run(job["schedule"], now.isoformat())
+ if nxt:
+ job["next_run_at"] = nxt
+ save_jobs(jobs)
+ return True
+ return False
+
+
def get_due_jobs() -> List[Dict[str, Any]]:
"""Get all jobs that are due to run now.
diff --git a/tests/cron/test_claim_job_for_fire.py b/tests/cron/test_claim_job_for_fire.py
new file mode 100644
index 00000000000..abbe969eb04
--- /dev/null
+++ b/tests/cron/test_claim_job_for_fire.py
@@ -0,0 +1,84 @@
+"""Tests for the store-level CAS fire claim (Phase 4C).
+
+`claim_job_for_fire` gives multi-machine at-most-once semantics when an external
+scheduler (Chronos) fires a job: across N gateway replicas, exactly ONE wins the
+claim for a given fire. Single-machine deployments always win (unaffected).
+
+These exercise the real store against a temp HERMES_HOME (no mocks) per the
+E2E-over-mocks discipline for file-touching code.
+"""
+import pytest
+
+
+@pytest.fixture
+def temp_home(tmp_path, monkeypatch):
+ """Isolated HERMES_HOME so jobs.json doesn't touch the real store."""
+ monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+ # cron.jobs caches no home at import; get_hermes_home() reads the env live.
+ yield tmp_path
+
+
+def test_claim_succeeds_once_then_blocks(temp_home):
+ """First claim for a fire wins; a second claim for the same fire loses, and
+ next_run_at is advanced (a re-delivery for the old time can't re-fire)."""
+ from cron.jobs import create_job, claim_job_for_fire, get_job
+
+ job = create_job(prompt="x", schedule="every 5m", name="t")
+ jid = job["id"]
+ before = get_job(jid)["next_run_at"]
+
+ assert claim_job_for_fire(jid) is True
+ assert claim_job_for_fire(jid) is False
+ assert get_job(jid)["next_run_at"] != before
+
+
+def test_claim_oneshot_cannot_be_double_claimed(temp_home):
+ """A one-shot can't be double-claimed (the fresh claim blocks the retry)."""
+ from cron.jobs import create_job, claim_job_for_fire
+
+ job = create_job(prompt="x", schedule="30m", name="o")
+ assert claim_job_for_fire(job["id"]) is True
+ assert claim_job_for_fire(job["id"]) is False
+
+
+def test_claim_unknown_job_returns_false(temp_home):
+ from cron.jobs import claim_job_for_fire
+
+ assert claim_job_for_fire("nope-does-not-exist") is False
+
+
+def test_claim_paused_job_returns_false(temp_home):
+ """A paused job can't be claimed."""
+ from cron.jobs import create_job, claim_job_for_fire, pause_job
+
+ job = create_job(prompt="x", schedule="every 5m", name="p")
+ pause_job(job["id"])
+ assert claim_job_for_fire(job["id"]) is False
+
+
+def test_stale_claim_is_reclaimable(temp_home, monkeypatch):
+ """A claim older than the TTL is overwritten — the fire isn't stuck forever
+ if the winning machine crashed before mark_job_run cleared the claim."""
+ from cron.jobs import create_job, claim_job_for_fire
+
+ job = create_job(prompt="x", schedule="every 5m", name="s")
+ jid = job["id"]
+ assert claim_job_for_fire(jid) is True
+ # With a 0s TTL, the existing claim is always considered stale.
+ assert claim_job_for_fire(jid, claim_ttl_seconds=0) is True
+
+
+def test_mark_job_run_clears_claim(temp_home):
+ """After a recurring job completes, its claim is cleared so the next fire
+ can be claimed again."""
+ from cron.jobs import create_job, claim_job_for_fire, mark_job_run, get_job
+
+ job = create_job(prompt="x", schedule="every 5m", name="c")
+ jid = job["id"]
+ assert claim_job_for_fire(jid) is True
+ assert get_job(jid).get("fire_claim") is not None
+
+ mark_job_run(jid, success=True)
+ assert get_job(jid).get("fire_claim") is None
+ # …and the re-armed recurring job is claimable again.
+ assert claim_job_for_fire(jid) is True
From 4c8bbe6416966fccc8663be0c4049121d2af5f07 Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:40:56 +1000
Subject: [PATCH 009/470] feat(cron): Chronos NAS-mediated managed-cron
provider (scale-to-zero)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 4D. The first non-default CronScheduler: plugins/cron/chronos/. Inert
unless cron.provider=chronos; resolve_cron_scheduler falls back to the built-in
if unavailable, so cron never loses its trigger.
Files:
- chronos/__init__.py — ChronosCronScheduler + register(ctx).
* is_available(): config-only, NO network (portal_url + callback_url + a
stored Nous access token via get_provider_auth_state). Returns False →
resolver falls back to built-in.
* start(): reconcile() then RETURN — no blocking loop, no 60s wake (DQ-1:
this is what makes scale-to-zero real; the machine wakes only on a
NAS→agent fire).
* _arm_one_shot(job): POST NAS provision {job_id, fire_at, agent_callback_url,
dedup_key=job_id:fire_at}. Agent owns the time → sub-minute fires survive
(no scheduler 1-minute floor).
* reconcile(): converge NAS arms toward jobs.json — arm missing/changed-time,
cancel orphaned, skip paused. Cold process rebuilds from jobs.json +
idempotent dedup_key.
* on_jobs_changed(): reconcile (re-arm/cancel the affected one-shot).
* fire_due(): ABC default (CAS claim + run_one_job) THEN re-arm the next
one-shot. Job gone (one-shot done / repeat-N exhausted) → no re-arm.
- chronos/_nas_client.py — thin HTTP wrapper for provision/cancel/list using
the agent's existing refresh-aware Nous token (resolve_nous_access_token).
Names no scheduler vendor; holds no scheduler creds.
- chronos/plugin.yaml — discovery metadata.
INVARIANT: zero "qstash"/"upstash" hits in plugins/cron, gateway, hermes_cli,
website/docs — the external scheduler is a NAS-internal detail, never named
agent-side.
Tests (13, all NAS mocked, zero network): is_available off-without-config +
on-with-config + makes-no-network; arm payload incl. sub-minute + noop without
next_run; reconcile arms-all / cancels-orphan / skips-paused / skips-already-
armed; fire_due re-arms next / no re-arm when job gone / no re-arm when claim
lost.
---
plugins/cron/chronos/__init__.py | 241 ++++++++++++++++++++++++++++
plugins/cron/chronos/_nas_client.py | 123 ++++++++++++++
plugins/cron/chronos/plugin.yaml | 9 ++
tests/plugins/test_chronos_cron.py | 203 +++++++++++++++++++++++
4 files changed, 576 insertions(+)
create mode 100644 plugins/cron/chronos/__init__.py
create mode 100644 plugins/cron/chronos/_nas_client.py
create mode 100644 plugins/cron/chronos/plugin.yaml
create mode 100644 tests/plugins/test_chronos_cron.py
diff --git a/plugins/cron/chronos/__init__.py b/plugins/cron/chronos/__init__.py
new file mode 100644
index 00000000000..1ec5a457763
--- /dev/null
+++ b/plugins/cron/chronos/__init__.py
@@ -0,0 +1,241 @@
+"""Chronos — NAS-mediated managed cron provider (scale-to-zero).
+
+Chronos (the Greek god of time, alongside Hermes) is the first non-default
+``CronScheduler``. It lets a hosted gateway scale to zero while idle and still
+fire cron jobs: instead of a 60s in-process ticker, it asks NAS to arm exactly
+one external one-shot per job at that job's real next-fire time. NAS calls the
+agent back at fire time over an authenticated webhook (``/api/cron/fire``); the
+agent runs the job via the shared ``run_one_job`` body and re-arms the next
+one-shot.
+
+The external scheduler NAS uses is an internal NAS implementation detail —
+Chronos names no vendor, holds no scheduler credentials, and speaks only to
+NAS's ``agent-cron`` endpoints with the agent's existing Nous token.
+
+Design constraints (see the plan's DQ-1):
+ - start() arms all enabled jobs and RETURNS; it never blocks and never spawns
+ a periodic wake. Between fires the machine is truly at zero.
+ - reconcile runs only on a warm process (start / on_jobs_changed / piggybacked
+ on a fire), never as a periodic wake of a sleeping machine.
+
+Inert unless ``cron.provider: chronos``. ``resolve_cron_scheduler`` falls back
+to the built-in if Chronos is unavailable, so cron never loses its trigger.
+
+Wire contract: ``docs/chronos-managed-cron-contract.md``.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Any, Dict, Optional
+
+from cron.scheduler_provider import CronScheduler
+
+logger = logging.getLogger("cron.chronos")
+
+
+def _cfg(*keys: str, default: Any = "") -> Any:
+ """Read a cron.chronos.* config value (no network)."""
+ try:
+ from hermes_cli.config import cfg_get, load_config
+ return cfg_get(load_config(), *keys, default=default)
+ except Exception:
+ return default
+
+
+class ChronosCronScheduler(CronScheduler):
+ """NAS-mediated external cron provider."""
+
+ def __init__(self) -> None:
+ # In-memory map of job_id → fire_at we've asked NAS to arm. Best-effort
+ # cache; reconcile rebuilds desired state from jobs.json, so a cold
+ # process simply re-arms (idempotent via dedup_key).
+ self._armed: Dict[str, str] = {}
+ self._lock = threading.Lock()
+ self._client = None # lazily constructed (no network in is_available)
+
+ # -- identity / availability -----------------------------------------
+
+ @property
+ def name(self) -> str:
+ return "chronos"
+
+ def is_available(self) -> bool:
+ """Config presence only — NO network.
+
+ Chronos needs a portal base URL, the agent's own publicly-reachable
+ callback URL (for NAS→agent fires), and a usable Nous token (the agent
+ is logged into the portal). If any is missing, resolve_cron_scheduler
+ falls back to the built-in ticker.
+ """
+ if not (_cfg("cron", "chronos", "portal_url") and _cfg("cron", "chronos", "callback_url")):
+ return False
+ return self._have_nous_token()
+
+ def _have_nous_token(self) -> bool:
+ """True if the agent has a Nous Portal login (no network call).
+
+ Checks the stored auth state for a Nous access token — does NOT refresh
+ or hit the network (is_available must stay offline). The actual
+ refresh-aware token is resolved lazily at provision time.
+ """
+ try:
+ from hermes_cli.auth import get_provider_auth_state
+ state = get_provider_auth_state("nous") or {}
+ return bool(state.get("access_token"))
+ except Exception:
+ return False
+
+ # -- client -----------------------------------------------------------
+
+ def _get_client(self):
+ if self._client is None:
+ from ._nas_client import NasCronClient
+ self._client = NasCronClient(_cfg("cron", "chronos", "portal_url"))
+ return self._client
+
+ def _callback_url(self) -> str:
+ return str(_cfg("cron", "chronos", "callback_url") or "")
+
+ # -- lifecycle --------------------------------------------------------
+
+ def start(self, stop_event, *, adapters=None, loop=None, interval=60):
+ """Arm all enabled jobs via NAS, then RETURN immediately.
+
+ Does NOT block and does NOT spawn a 60s wake (DQ-1) — that is the whole
+ point of scale-to-zero. The machine wakes only on a NAS→agent fire.
+ """
+ try:
+ self.reconcile()
+ except Exception as e:
+ logger.warning("Chronos start() reconcile failed: %s", e)
+ # Intentionally return — no loop, no periodic wake.
+
+ def stop(self) -> None:
+ return None
+
+ def on_jobs_changed(self) -> None:
+ """A job was created/updated/removed/paused/resumed — reconcile the NAS
+ registry so the affected one-shot is (re-)armed or cancelled."""
+ try:
+ self.reconcile()
+ except Exception as e:
+ logger.debug("Chronos on_jobs_changed reconcile failed: %s", e)
+
+ # -- arming -----------------------------------------------------------
+
+ def _arm_one_shot(self, job: Dict[str, Any]) -> None:
+ """Ask NAS to arm exactly one one-shot at the job's next_run_at.
+
+ The agent computes the time; NAS+its scheduler are the dumb executor.
+ Idempotent per (job_id, fire_at) via dedup_key, so re-arming the same
+ fire is a no-op NAS-side.
+ """
+ job_id = job["id"]
+ fire_at = job.get("next_run_at")
+ if not fire_at:
+ return
+ dedup_key = f"{job_id}:{fire_at}"
+ self._get_client().provision(
+ job_id=job_id,
+ fire_at=fire_at,
+ agent_callback_url=self._callback_url(),
+ dedup_key=dedup_key,
+ )
+ with self._lock:
+ self._armed[job_id] = fire_at
+
+ def _cancel(self, job_id: str) -> None:
+ try:
+ self._get_client().cancel(job_id=job_id)
+ finally:
+ with self._lock:
+ self._armed.pop(job_id, None)
+
+ def _list_armed(self) -> Dict[str, str]:
+ """Observed armed one-shots: job_id → fire_at.
+
+ Prefer the in-memory map (warm process); on a cold/empty map, ask NAS
+ (best-effort). If NAS list fails, return what we have — reconcile then
+ re-arms desired jobs idempotently.
+ """
+ with self._lock:
+ if self._armed:
+ return dict(self._armed)
+ try:
+ observed = {
+ item["job_id"]: item.get("fire_at", "")
+ for item in self._get_client().list_armed()
+ if item.get("job_id")
+ }
+ with self._lock:
+ self._armed.update(observed)
+ return observed
+ except Exception as e:
+ logger.debug("Chronos _list_armed failed (will re-arm idempotently): %s", e)
+ return {}
+
+ # -- reconcile --------------------------------------------------------
+
+ def reconcile(self) -> None:
+ """Converge the NAS-armed one-shots toward jobs.json (desired state):
+ arm missing / re-arm changed-time, cancel orphaned."""
+ from cron.jobs import load_jobs
+
+ desired: Dict[str, str] = {
+ j["id"]: j["next_run_at"]
+ for j in load_jobs()
+ if j.get("enabled") and j.get("next_run_at") and j.get("state") != "paused"
+ }
+ observed = self._list_armed()
+
+ # Arm missing or changed-time.
+ for job_id, fire_at in desired.items():
+ if observed.get(job_id) != fire_at:
+ # Re-fetch the full job dict to arm (need the whole record).
+ from cron.jobs import get_job
+ job = get_job(job_id)
+ if job:
+ try:
+ self._arm_one_shot(job)
+ except Exception as e:
+ logger.warning("Chronos failed to arm job %s: %s", job_id, e)
+
+ # Cancel orphans (armed but no longer desired).
+ for job_id in list(observed.keys()):
+ if job_id not in desired:
+ try:
+ self._cancel(job_id)
+ except Exception as e:
+ logger.warning("Chronos failed to cancel orphan %s: %s", job_id, e)
+
+ # -- fire -------------------------------------------------------------
+
+ def fire_due(self, job_id: str, *, adapters: Any = None, loop: Any = None) -> bool:
+ """Run the due job (claim + run_one_job via the ABC default), then
+ re-arm the NEXT one-shot through NAS.
+
+ Re-arm happens AFTER the run so next_run_at reflects the completed fire.
+ If the job is gone (one-shot completed / repeat-N exhausted), get_job
+ returns None → nothing to re-arm (the schedule naturally stops).
+ """
+ ran = super().fire_due(job_id, adapters=adapters, loop=loop)
+ if ran:
+ from cron.jobs import get_job
+ job = get_job(job_id)
+ if job and job.get("enabled") and job.get("next_run_at"):
+ try:
+ self._arm_one_shot(job)
+ except Exception as e:
+ logger.warning("Chronos failed to re-arm job %s after fire: %s", job_id, e)
+ return ran
+
+
+def register(ctx) -> None:
+ """Plugin entrypoint — register the Chronos provider with the loader.
+
+ Mirrors the memory-plugin shape; plugins/cron discovery calls this and
+ collects the provider via register_cron_scheduler.
+ """
+ ctx.register_cron_scheduler(ChronosCronScheduler())
diff --git a/plugins/cron/chronos/_nas_client.py b/plugins/cron/chronos/_nas_client.py
new file mode 100644
index 00000000000..04382adc8ea
--- /dev/null
+++ b/plugins/cron/chronos/_nas_client.py
@@ -0,0 +1,123 @@
+"""Thin HTTP client for the agent → NAS ``agent-cron`` endpoints (Chronos).
+
+The Chronos provider speaks ONLY to NAS — it names no scheduler vendor and
+holds no scheduler credentials. NAS owns the external scheduler (an internal
+implementation detail) and that scheduler's account; the agent just asks NAS to
+"arm a one-shot at time T" / "cancel" / "list", authenticated with the agent's
+existing Nous Portal access token (the same token it already uses to call the
+portal — no new secret).
+
+Wire contract: ``docs/chronos-managed-cron-contract.md``.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger("cron.chronos")
+
+# Endpoint paths under the portal base URL.
+_PROVISION_PATH = "/api/agent-cron/provision"
+_CANCEL_PATH = "/api/agent-cron/cancel"
+_LIST_PATH = "/api/agent-cron/list"
+
+
+class NasCronClientError(RuntimeError):
+ """Raised when a NAS agent-cron call fails (non-2xx or transport error)."""
+
+
+class NasCronClient:
+ """Minimal client for the agent→NAS provision/cancel/list endpoints.
+
+ Uses the agent's refresh-aware Nous access token for auth. No scheduler
+ vendor, no scheduler creds — NAS hides all of that behind these three calls.
+ """
+
+ def __init__(self, portal_url: str, *, timeout_seconds: float = 15.0) -> None:
+ self.portal_url = portal_url.rstrip("/")
+ self.timeout_seconds = timeout_seconds
+
+ # -- auth -------------------------------------------------------------
+
+ def _access_token(self) -> str:
+ """The agent's existing Nous Portal access token (refresh-aware)."""
+ from hermes_cli.auth import resolve_nous_access_token
+ return resolve_nous_access_token()
+
+ def _headers(self) -> Dict[str, str]:
+ return {
+ "Authorization": f"Bearer {self._access_token()}",
+ "Content-Type": "application/json",
+ }
+
+ # -- HTTP -------------------------------------------------------------
+
+ def _post(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
+ import requests # lazy: agent already depends on requests
+
+ url = f"{self.portal_url}{path}"
+ try:
+ resp = requests.post(
+ url, json=body, headers=self._headers(), timeout=self.timeout_seconds
+ )
+ except Exception as e:
+ raise NasCronClientError(f"POST {path} failed: {e}") from e
+ if resp.status_code // 100 != 2:
+ raise NasCronClientError(
+ f"POST {path} returned {resp.status_code}: {resp.text[:200]}"
+ )
+ try:
+ return resp.json() if resp.content else {}
+ except Exception:
+ return {}
+
+ def _get(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
+ import requests
+
+ url = f"{self.portal_url}{path}"
+ try:
+ resp = requests.get(
+ url, params=params, headers=self._headers(), timeout=self.timeout_seconds
+ )
+ except Exception as e:
+ raise NasCronClientError(f"GET {path} failed: {e}") from e
+ if resp.status_code // 100 != 2:
+ raise NasCronClientError(
+ f"GET {path} returned {resp.status_code}: {resp.text[:200]}"
+ )
+ try:
+ return resp.json() if resp.content else {}
+ except Exception:
+ return {}
+
+ # -- endpoints --------------------------------------------------------
+
+ def provision(self, *, job_id: str, fire_at: str, agent_callback_url: str,
+ dedup_key: str) -> Dict[str, Any]:
+ """Ask NAS to arm a one-shot for ``job_id`` at ``fire_at`` (ISO 8601).
+
+ ``dedup_key`` (``{job_id}:{fire_at}``) makes re-arming the same fire
+ idempotent NAS-side. Returns the NAS response (e.g. ``{schedule_id}``).
+ """
+ return self._post(_PROVISION_PATH, {
+ "job_id": job_id,
+ "fire_at": fire_at,
+ "agent_callback_url": agent_callback_url,
+ "dedup_key": dedup_key,
+ })
+
+ def cancel(self, *, job_id: str) -> Dict[str, Any]:
+ """Ask NAS to cancel any armed one-shot for ``job_id``."""
+ return self._post(_CANCEL_PATH, {"job_id": job_id})
+
+ def list_armed(self) -> List[Dict[str, Any]]:
+ """List the one-shots NAS currently has armed for this agent.
+
+ Returns a list of ``{job_id, fire_at, schedule_id}``. Best-effort: used
+ by reconcile to find orphaned arms on a cold process; on error the
+ caller falls back to idempotent re-arm of all desired jobs.
+ """
+ data = self._get(_LIST_PATH, {})
+ items = data.get("armed") if isinstance(data, dict) else None
+ return items if isinstance(items, list) else []
diff --git a/plugins/cron/chronos/plugin.yaml b/plugins/cron/chronos/plugin.yaml
new file mode 100644
index 00000000000..aad48b35655
--- /dev/null
+++ b/plugins/cron/chronos/plugin.yaml
@@ -0,0 +1,9 @@
+name: chronos
+description: >-
+ Chronos — NAS-mediated managed cron provider for scale-to-zero hosted agents.
+ Delegates the "wake me at time T" trigger to Nous infrastructure so an idle
+ gateway can scale to zero and still fire cron jobs. The agent computes each
+ job's next-fire time and asks NAS to arm a one-shot; NAS calls the agent back
+ at fire time over an authenticated webhook. Inert unless cron.provider=chronos.
+version: 1.0.0
+author: Nous Research
diff --git a/tests/plugins/test_chronos_cron.py b/tests/plugins/test_chronos_cron.py
new file mode 100644
index 00000000000..36b32f7a501
--- /dev/null
+++ b/tests/plugins/test_chronos_cron.py
@@ -0,0 +1,203 @@
+"""Unit tests for the Chronos NAS-mediated cron provider (Phase 4D).
+
+All NAS calls are mocked — ZERO live network. These prove:
+ - is_available is config-only (no network), false without config.
+ - one-shot arming sends the right provision payload (incl. sub-minute fires —
+ the agent owns the time, so there's no 1-minute floor).
+ - reconcile arms missing, cancels orphaned, skips paused.
+ - fire_due re-arms the next one-shot after a successful run, and repeat-N
+ (job gone) stops re-arming.
+"""
+
+import pytest
+
+
+@pytest.fixture
+def temp_home(tmp_path, monkeypatch):
+ monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+ yield tmp_path
+
+
+@pytest.fixture
+def chronos(monkeypatch):
+ """A ChronosCronScheduler with a fake NAS client capturing calls."""
+ from plugins.cron.chronos import ChronosCronScheduler
+
+ class FakeClient:
+ def __init__(self):
+ self.provisions = []
+ self.cancels = []
+ self._armed = []
+
+ def provision(self, *, job_id, fire_at, agent_callback_url, dedup_key):
+ self.provisions.append({
+ "job_id": job_id, "fire_at": fire_at,
+ "agent_callback_url": agent_callback_url, "dedup_key": dedup_key,
+ })
+ return {"schedule_id": f"sched-{job_id}"}
+
+ def cancel(self, *, job_id):
+ self.cancels.append(job_id)
+ return {}
+
+ def list_armed(self):
+ return list(self._armed)
+
+ prov = ChronosCronScheduler()
+ fake = FakeClient()
+ prov._client = fake
+ # callback_url is read via _cfg; patch the module helper to avoid config.
+ monkeypatch.setattr("plugins.cron.chronos._cfg",
+ lambda *k, default="": "https://agent.example/" if k[-1] == "callback_url" else "https://portal.test")
+ return prov, fake
+
+
+# -- is_available -------------------------------------------------------------
+
+def test_is_available_false_without_config(temp_home, monkeypatch):
+ from plugins.cron.chronos import ChronosCronScheduler
+
+ monkeypatch.setattr("plugins.cron.chronos._cfg", lambda *k, default="": "")
+ assert ChronosCronScheduler().is_available() is False
+
+
+def test_is_available_true_with_config_and_token(temp_home, monkeypatch):
+ import plugins.cron.chronos as mod
+ from plugins.cron.chronos import ChronosCronScheduler
+
+ monkeypatch.setattr(mod, "_cfg", lambda *k, default="": "https://x" )
+ monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state",
+ lambda pid: {"access_token": "tok"})
+ assert ChronosCronScheduler().is_available() is True
+
+
+def test_is_available_makes_no_network(temp_home, monkeypatch):
+ """is_available must not construct the NAS client / hit network."""
+ import plugins.cron.chronos as mod
+ from plugins.cron.chronos import ChronosCronScheduler
+
+ monkeypatch.setattr(mod, "_cfg", lambda *k, default="": "https://x")
+ monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state",
+ lambda pid: {"access_token": "tok"})
+ p = ChronosCronScheduler()
+
+ def explode():
+ raise AssertionError("is_available must not build the NAS client")
+
+ monkeypatch.setattr(p, "_get_client", explode)
+ assert p.is_available() is True # did not call _get_client
+
+
+# -- arming -------------------------------------------------------------------
+
+def test_arm_one_shot_sends_provision(chronos):
+ prov, fake = chronos
+ prov._arm_one_shot({"id": "j1", "next_run_at": "2026-06-18T12:00:00+00:00"})
+
+ assert len(fake.provisions) == 1
+ p = fake.provisions[0]
+ assert p["job_id"] == "j1"
+ assert p["fire_at"] == "2026-06-18T12:00:00+00:00"
+ assert p["dedup_key"] == "j1:2026-06-18T12:00:00+00:00"
+ assert p["agent_callback_url"] == "https://agent.example/"
+
+
+def test_arm_one_shot_preserves_sub_minute_fire(chronos):
+ """Sub-minute fire times survive — the agent owns the time, so there's no
+ 1-minute scheduler floor."""
+ prov, fake = chronos
+ prov._arm_one_shot({"id": "j2", "next_run_at": "2026-06-18T12:00:30+00:00"})
+ assert fake.provisions[0]["fire_at"] == "2026-06-18T12:00:30+00:00"
+
+
+def test_arm_one_shot_noop_without_next_run(chronos):
+ prov, fake = chronos
+ prov._arm_one_shot({"id": "j3", "next_run_at": None})
+ assert fake.provisions == []
+
+
+# -- reconcile ----------------------------------------------------------------
+
+def test_reconcile_arms_all_enabled(temp_home, chronos, monkeypatch):
+ prov, fake = chronos
+ jobs = [
+ {"id": "a", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "scheduled"},
+ {"id": "b", "enabled": True, "next_run_at": "2026-06-18T12:05:00+00:00", "state": "scheduled"},
+ ]
+ monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
+ monkeypatch.setattr("cron.jobs.get_job", lambda jid: next(j for j in jobs if j["id"] == jid))
+
+ prov.reconcile()
+ assert {p["job_id"] for p in fake.provisions} == {"a", "b"}
+ assert fake.cancels == []
+
+
+def test_reconcile_cancels_orphan_arms_desired(temp_home, chronos, monkeypatch):
+ prov, fake = chronos
+ # NAS already has a stale arm for deleted job "gone".
+ prov._armed = {"gone": "2026-06-18T11:00:00+00:00"}
+ jobs = [{"id": "a", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "scheduled"}]
+ monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
+ monkeypatch.setattr("cron.jobs.get_job", lambda jid: next((j for j in jobs if j["id"] == jid), None))
+
+ prov.reconcile()
+ assert [p["job_id"] for p in fake.provisions] == ["a"]
+ assert fake.cancels == ["gone"]
+
+
+def test_reconcile_skips_paused(temp_home, chronos, monkeypatch):
+ prov, fake = chronos
+ jobs = [{"id": "p", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "paused"}]
+ monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
+ monkeypatch.setattr("cron.jobs.get_job", lambda jid: next((j for j in jobs if j["id"] == jid), None))
+
+ prov.reconcile()
+ assert fake.provisions == []
+
+
+def test_reconcile_skips_already_armed_same_time(temp_home, chronos, monkeypatch):
+ prov, fake = chronos
+ prov._armed = {"a": "2026-06-18T12:00:00+00:00"}
+ jobs = [{"id": "a", "enabled": True, "next_run_at": "2026-06-18T12:00:00+00:00", "state": "scheduled"}]
+ monkeypatch.setattr("cron.jobs.load_jobs", lambda: jobs)
+ monkeypatch.setattr("cron.jobs.get_job", lambda jid: jobs[0])
+
+ prov.reconcile()
+ assert fake.provisions == [] # already armed at the same time → no re-arm
+
+
+# -- fire_due re-arm ----------------------------------------------------------
+
+def test_fire_due_rearms_next_oneshot(chronos, monkeypatch):
+ prov, fake = chronos
+ # super().fire_due runs the job; stub the ABC default to "ran".
+ monkeypatch.setattr("cron.scheduler_provider.CronScheduler.fire_due",
+ lambda self, jid, **kw: True)
+ monkeypatch.setattr("cron.jobs.get_job",
+ lambda jid: {"id": jid, "enabled": True, "next_run_at": "2026-06-18T12:05:00+00:00"})
+
+ assert prov.fire_due("j1") is True
+ assert [p["job_id"] for p in fake.provisions] == ["j1"]
+ assert fake.provisions[0]["fire_at"] == "2026-06-18T12:05:00+00:00"
+
+
+def test_fire_due_no_rearm_when_job_gone(chronos, monkeypatch):
+ """repeat-N exhausted / one-shot completed → mark_job_run deleted the job →
+ get_job None → no re-arm (the schedule stops cleanly)."""
+ prov, fake = chronos
+ monkeypatch.setattr("cron.scheduler_provider.CronScheduler.fire_due",
+ lambda self, jid, **kw: True)
+ monkeypatch.setattr("cron.jobs.get_job", lambda jid: None)
+
+ assert prov.fire_due("j1") is True
+ assert fake.provisions == []
+
+
+def test_fire_due_no_rearm_when_claim_lost(chronos, monkeypatch):
+ """If the run didn't happen (claim lost), don't re-arm."""
+ prov, fake = chronos
+ monkeypatch.setattr("cron.scheduler_provider.CronScheduler.fire_due",
+ lambda self, jid, **kw: False)
+
+ assert prov.fire_due("j1") is False
+ assert fake.provisions == []
From 3fc7b624d860aca1004155cbe8a09a083bbef30a Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 14:46:33 +1000
Subject: [PATCH 010/470] feat(cron,gateway): NAS-JWT fire verifier +
/api/cron/fire webhook (Chronos)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 4E (E.1 + E.2). The inbound side of Chronos: NAS POSTs the agent when a
one-shot fires; the agent verifies a NAS-minted JWT and runs the job.
E.1 — plugins/cron/chronos/verify.py:
- verify_nas_fire_token(token, expected_audience, jwks_or_key, issuer): verifies
signature against the NAS JWKS (RS/ES family; symmetric rejected), aud == this
agent, exp/nbf, iss, and purpose == "cron_fire" (so a general agent JWT can't
be replayed against the fire endpoint). Returns claims or None; never raises.
Crypto delegated to PyJWT[crypto] (already a declared dep) — no hand-rolled
JWT, no new dependency. No key configured → refuse (never unsigned-decode a
security boundary).
- get_fire_verifier(): pluggable indirection so the DQ-4 escape hatch
(direct per-job cron-key) can swap in with no handler change.
E.2 — gateway/platforms/api_server.py:
- POST /api/cron/fire (registered only when _CRON_AVAILABLE). Authenticated by
the NAS-JWT via get_fire_verifier() — NOT API_SERVER_KEY (NAS holds no API
key; this is the only inbound that triggers remote job execution, so it gets
its own purpose-scoped check). Verifier args come from cron.chronos.* config.
401 on bad/missing/forged token. 400 on missing job_id. On success: 202 +
fire_due runs in the background (so a long agent turn never trips NAS's HTTP
timeout); the store CAS claim inside fire_due de-dupes a scheduler retry.
Tests:
- test_chronos_verify (11): REAL RS256 signing — valid→claims, wrong-aud,
missing/wrong purpose, expired, wrong-iss, tampered-signature (attacker key),
no-key-refuse, empty-token, JWKS-URL key resolution, get_fire_verifier.
- test_cron_fire_webhook (5): valid→202+fire, invalid→401+no-fire, missing
token→401, missing job_id→400, and fire path does NOT require API_SERVER_KEY.
api_server regression suites (214) green.
E.3 (NAS endpoints) is a separate cross-repo PR; the wire contract lands next
(docs/chronos-managed-cron-contract.md).
---
gateway/platforms/api_server.py | 63 ++++++++
plugins/cron/chronos/verify.py | 103 ++++++++++++++
tests/gateway/test_cron_fire_webhook.py | 152 ++++++++++++++++++++
tests/plugins/test_chronos_verify.py | 182 ++++++++++++++++++++++++
4 files changed, 500 insertions(+)
create mode 100644 plugins/cron/chronos/verify.py
create mode 100644 tests/gateway/test_cron_fire_webhook.py
create mode 100644 tests/plugins/test_chronos_verify.py
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index da86952a09d..c657f4b4c6d 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -3342,6 +3342,64 @@ class APIServerAdapter(BasePlatformAdapter):
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
+ async def _handle_cron_fire(self, request: "web.Request") -> "web.Response":
+ """POST /api/cron/fire — Chronos managed-cron fire webhook (NAS → agent).
+
+ Authenticated by a NAS-minted JWT (verified via the pluggable
+ fire-verifier), NOT API_SERVER_KEY — NAS holds no API server key, and
+ this is the only inbound that can trigger remote job execution, so it
+ gets its own purpose-scoped token check.
+
+ Returns 202 + runs the job in the background so a long agent turn never
+ trips NAS's HTTP timeout. The store CAS claim inside fire_due guards
+ against double-fire on a NAS/scheduler retry.
+ """
+ from hermes_cli.config import cfg_get, load_config
+ from plugins.cron.chronos.verify import get_fire_verifier
+
+ auth = request.headers.get("Authorization", "")
+ token = auth[7:].strip() if auth.startswith("Bearer ") else ""
+
+ cfg = load_config()
+ claims = get_fire_verifier()(
+ token=token,
+ expected_audience=cfg_get(cfg, "cron", "chronos", "expected_audience", default=""),
+ jwks_or_key=cfg_get(cfg, "cron", "chronos", "nas_jwks_url", default="") or None,
+ issuer=cfg_get(cfg, "cron", "chronos", "portal_url", default="") or None,
+ )
+ if claims is None:
+ logger.warning(
+ "cron fire: rejected invalid token: %s",
+ self._request_audit_log_suffix(request),
+ )
+ return web.json_response({"error": "invalid fire token"}, status=401)
+
+ try:
+ body = await request.json()
+ except Exception:
+ body = {}
+ job_id = (body or {}).get("job_id")
+ if not job_id:
+ return web.json_response({"error": "missing job_id"}, status=400)
+
+ from cron.scheduler_provider import resolve_cron_scheduler
+ provider = resolve_cron_scheduler()
+
+ loop = asyncio.get_running_loop()
+ # Fire in the background (202 immediately). fire_due claims via the
+ # store CAS, so a retry while this is in flight is de-duped.
+ task = asyncio.create_task(
+ asyncio.to_thread(provider.fire_due, job_id, adapters=None, loop=loop)
+ )
+ try:
+ self._background_tasks.add(task)
+ task.add_done_callback(self._background_tasks.discard)
+ except (TypeError, AttributeError):
+ pass
+
+ return web.json_response({"status": "accepted", "job_id": job_id}, status=202)
+
+
# ------------------------------------------------------------------
# Output extraction helper
# ------------------------------------------------------------------
@@ -4196,6 +4254,11 @@ class APIServerAdapter(BasePlatformAdapter):
self._app.router.add_post("/api/jobs/{job_id}/pause", self._handle_pause_job)
self._app.router.add_post("/api/jobs/{job_id}/resume", self._handle_resume_job)
self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
+
+ # Chronos managed-cron fire webhook (NAS → agent). Authenticated by a
+ # NAS-minted JWT (NOT API_SERVER_KEY), so it has its own auth path.
+ if _CRON_AVAILABLE:
+ self._app.router.add_post("/api/cron/fire", self._handle_cron_fire)
# Structured event streaming
self._app.router.add_post("/v1/runs", self._handle_runs)
self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
diff --git a/plugins/cron/chronos/verify.py b/plugins/cron/chronos/verify.py
new file mode 100644
index 00000000000..99c8db93e4b
--- /dev/null
+++ b/plugins/cron/chronos/verify.py
@@ -0,0 +1,103 @@
+"""Inbound cron-fire token verification for Chronos (Phase 4E.1).
+
+When NAS relays an external scheduler fire to the agent, it POSTs
+``/api/cron/fire`` with a short-lived NAS-minted JWT. This module verifies that
+JWT before any job runs — the security boundary for remotely-triggered job
+execution.
+
+We verify a NAS-minted JWT (the trust path the agent already has) rather than
+let an external scheduler call the agent directly: the scheduler signs with
+NAS's keys, which the agent doesn't (and shouldn't) hold. See the plan's DQ-4.
+
+The verifier is pluggable (``get_fire_verifier``) so the escape-hatch mode
+(direct per-job cron-key) can swap in later with no handler change.
+
+Crypto is delegated to PyJWT (already a declared dependency) — we do NOT
+hand-roll JWT verification.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Callable, Dict, Optional
+
+logger = logging.getLogger("cron.chronos.verify")
+
+# The purpose claim that scopes a token to the fire endpoint. A general agent
+# JWT (without this claim) must NOT be replayable against /api/cron/fire.
+_FIRE_PURPOSE = "cron_fire"
+
+
+def verify_nas_fire_token(
+ *,
+ token: str,
+ expected_audience: str,
+ jwks_or_key: Optional[str] = None,
+ issuer: Optional[str] = None,
+ leeway_seconds: int = 30,
+) -> Optional[Dict[str, Any]]:
+ """Verify a NAS-minted cron-fire JWT. Return decoded claims, or None.
+
+ Checks (all must pass):
+ - signature against the NAS JWKS (``jwks_or_key`` is a JWKS URL) — RS256
+ family; symmetric secrets are rejected (NAS signs asymmetrically).
+ - ``aud`` == ``expected_audience`` (this agent: ``agent:{instance_id}``).
+ - ``exp`` / ``nbf`` within ``leeway_seconds``.
+ - ``iss`` == ``issuer`` when an issuer is configured.
+ - ``purpose`` == ``"cron_fire"`` — so a general agent JWT can't be
+ replayed against the fire endpoint.
+
+ Returns None (never raises) on any failure, so the handler can answer 401
+ without leaking which check failed.
+ """
+ if not token or not expected_audience:
+ return None
+ if not jwks_or_key:
+ # No verification key configured → cannot verify → refuse. We never
+ # fall back to unsigned decode for a security boundary.
+ logger.warning("cron fire: no JWKS/key configured; refusing token")
+ return None
+
+ try:
+ import jwt
+ from jwt import PyJWKClient
+
+ # Resolve the signing key from the JWKS endpoint by the token's kid.
+ signing_key = None
+ if jwks_or_key.startswith("http://") or jwks_or_key.startswith("https://"):
+ jwk_client = PyJWKClient(jwks_or_key)
+ signing_key = jwk_client.get_signing_key_from_jwt(token).key
+ else:
+ # A PEM public key passed inline (test / pinned-key deployments).
+ signing_key = jwks_or_key
+
+ options = {"require": ["exp", "aud"]}
+ decode_kwargs: Dict[str, Any] = dict(
+ algorithms=["RS256", "RS384", "RS512", "ES256", "ES384"],
+ audience=expected_audience,
+ leeway=leeway_seconds,
+ options=options,
+ )
+ if issuer:
+ decode_kwargs["issuer"] = issuer
+
+ claims = jwt.decode(token, signing_key, **decode_kwargs)
+ except Exception as e:
+ logger.warning("cron fire: token verification failed: %s", e)
+ return None
+
+ if claims.get("purpose") != _FIRE_PURPOSE:
+ logger.warning("cron fire: token missing/!=%s purpose claim", _FIRE_PURPOSE)
+ return None
+
+ return claims
+
+
+def get_fire_verifier() -> Callable[..., Optional[Dict[str, Any]]]:
+ """Return the active inbound-fire verifier.
+
+ Default = the NAS-JWT verifier. The DQ-4 escape hatch (direct per-job
+ cron-key) would return a cron-key verifier here instead, selected by config
+ — so the webhook handler never changes when the auth mode is swapped.
+ """
+ return verify_nas_fire_token
diff --git a/tests/gateway/test_cron_fire_webhook.py b/tests/gateway/test_cron_fire_webhook.py
new file mode 100644
index 00000000000..e4aef243526
--- /dev/null
+++ b/tests/gateway/test_cron_fire_webhook.py
@@ -0,0 +1,152 @@
+"""Tests for the Chronos cron-fire webhook (POST /api/cron/fire) — Phase 4E.2.
+
+The webhook authenticates a NAS-minted JWT via the pluggable fire-verifier
+(NOT API_SERVER_KEY), then runs the job via the resolved provider's fire_due in
+the background, returning 202. These tests monkeypatch the verifier and
+resolve_cron_scheduler — the verifier itself is tested with real crypto in
+test_chronos_verify.py.
+"""
+
+import asyncio
+
+import pytest
+from aiohttp import web
+from aiohttp.test_utils import TestClient, TestServer
+
+from gateway.config import PlatformConfig
+from gateway.platforms.api_server import APIServerAdapter, cors_middleware
+
+_MOD = "gateway.platforms.api_server"
+
+
+def _make_adapter() -> APIServerAdapter:
+ return APIServerAdapter(PlatformConfig(enabled=True, extra={"key": "sk-secret"}))
+
+
+def _create_app(adapter: APIServerAdapter) -> web.Application:
+ app = web.Application(middlewares=[cors_middleware])
+ app["api_server_adapter"] = adapter
+ app.router.add_post("/api/cron/fire", adapter._handle_cron_fire)
+ return app
+
+
+@pytest.fixture
+def adapter():
+ return _make_adapter()
+
+
+class _SpyProvider:
+ """Records fire_due calls; stands in for the resolved provider."""
+
+ def __init__(self):
+ self.fired = []
+
+ def fire_due(self, job_id, *, adapters=None, loop=None):
+ self.fired.append(job_id)
+ return True
+
+
+@pytest.mark.asyncio
+async def test_valid_token_accepts_and_fires(adapter, monkeypatch):
+ """Valid NAS-JWT + {job_id} → 202 and fire_due invoked with that id."""
+ spy = _SpyProvider()
+ monkeypatch.setattr("cron.scheduler_provider.resolve_cron_scheduler", lambda: spy)
+ # verifier returns claims (valid token)
+ monkeypatch.setattr(
+ "plugins.cron.chronos.verify.get_fire_verifier",
+ lambda: (lambda **kw: {"purpose": "cron_fire", "aud": "agent:x"}),
+ )
+
+ app = _create_app(adapter)
+ async with TestClient(TestServer(app)) as cli:
+ resp = await cli.post("/api/cron/fire",
+ headers={"Authorization": "Bearer good"},
+ json={"job_id": "abc123"})
+ assert resp.status == 202
+ data = await resp.json()
+ assert data["job_id"] == "abc123"
+
+ # fire runs in a background thread/task — give it a beat to land.
+ for _ in range(50):
+ if spy.fired:
+ break
+ await asyncio.sleep(0.01)
+ assert spy.fired == ["abc123"]
+
+
+@pytest.mark.asyncio
+async def test_invalid_token_401_and_no_fire(adapter, monkeypatch):
+ """Bad/forged token → 401, fire_due NOT invoked."""
+ spy = _SpyProvider()
+ monkeypatch.setattr("cron.scheduler_provider.resolve_cron_scheduler", lambda: spy)
+ monkeypatch.setattr(
+ "plugins.cron.chronos.verify.get_fire_verifier",
+ lambda: (lambda **kw: None), # verification fails
+ )
+
+ app = _create_app(adapter)
+ async with TestClient(TestServer(app)) as cli:
+ resp = await cli.post("/api/cron/fire",
+ headers={"Authorization": "Bearer forged"},
+ json={"job_id": "abc123"})
+ assert resp.status == 401
+
+ await asyncio.sleep(0.05)
+ assert spy.fired == []
+
+
+@pytest.mark.asyncio
+async def test_missing_token_401(adapter, monkeypatch):
+ """No Authorization header → verifier gets empty token → 401."""
+ spy = _SpyProvider()
+ monkeypatch.setattr("cron.scheduler_provider.resolve_cron_scheduler", lambda: spy)
+ # Real verifier: empty token returns None.
+ app = _create_app(adapter)
+ async with TestClient(TestServer(app)) as cli:
+ resp = await cli.post("/api/cron/fire", json={"job_id": "abc123"})
+ assert resp.status == 401
+ assert spy.fired == []
+
+
+@pytest.mark.asyncio
+async def test_missing_job_id_400(adapter, monkeypatch):
+ """Valid token but no job_id → 400, no fire."""
+ spy = _SpyProvider()
+ monkeypatch.setattr("cron.scheduler_provider.resolve_cron_scheduler", lambda: spy)
+ monkeypatch.setattr(
+ "plugins.cron.chronos.verify.get_fire_verifier",
+ lambda: (lambda **kw: {"purpose": "cron_fire"}),
+ )
+
+ app = _create_app(adapter)
+ async with TestClient(TestServer(app)) as cli:
+ resp = await cli.post("/api/cron/fire",
+ headers={"Authorization": "Bearer good"},
+ json={})
+ assert resp.status == 400
+ assert spy.fired == []
+
+
+@pytest.mark.asyncio
+async def test_fire_does_not_require_api_server_key(adapter, monkeypatch):
+ """The fire endpoint must NOT gate on API_SERVER_KEY — auth is the NAS-JWT.
+ A request with NO API key header but a valid fire token still succeeds."""
+ spy = _SpyProvider()
+ monkeypatch.setattr("cron.scheduler_provider.resolve_cron_scheduler", lambda: spy)
+ monkeypatch.setattr(
+ "plugins.cron.chronos.verify.get_fire_verifier",
+ lambda: (lambda **kw: {"purpose": "cron_fire"}),
+ )
+
+ app = _create_app(adapter)
+ async with TestClient(TestServer(app)) as cli:
+ # Bearer is the FIRE token, not the API_SERVER_KEY "sk-secret".
+ resp = await cli.post("/api/cron/fire",
+ headers={"Authorization": "Bearer nas-jwt"},
+ json={"job_id": "j9"})
+ assert resp.status == 202
+ for _ in range(50):
+ if spy.fired:
+ break
+ await asyncio.sleep(0.01)
+ assert spy.fired == ["j9"]
diff --git a/tests/plugins/test_chronos_verify.py b/tests/plugins/test_chronos_verify.py
new file mode 100644
index 00000000000..1d9259f4eee
--- /dev/null
+++ b/tests/plugins/test_chronos_verify.py
@@ -0,0 +1,182 @@
+"""Tests for the Chronos inbound cron-fire JWT verifier (Phase 4E.1).
+
+These exercise REAL RS256 signing/verification (PyJWT[crypto] is a declared
+dependency) against an inline PEM public key — no mocking of the crypto, since
+this is a security boundary. The JWKS-URL path is covered separately by mocking
+PyJWKClient's key resolution.
+"""
+
+import time
+
+import pytest
+
+
+@pytest.fixture(scope="module")
+def rsa_keys():
+ """An RS256 keypair: (private_pem, public_pem)."""
+ from cryptography.hazmat.primitives import serialization
+ from cryptography.hazmat.primitives.asymmetric import rsa
+
+ key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
+ priv = key.private_bytes(
+ encoding=serialization.Encoding.PEM,
+ format=serialization.PrivateFormat.PKCS8,
+ encryption_algorithm=serialization.NoEncryption(),
+ ).decode()
+ pub = key.public_key().public_bytes(
+ encoding=serialization.Encoding.PEM,
+ format=serialization.PublicFormat.SubjectPublicKeyInfo,
+ ).decode()
+ return priv, pub
+
+
+def _mint(priv, claims):
+ import jwt
+ return jwt.encode(claims, priv, algorithm="RS256")
+
+
+AUD = "agent:inst-123"
+ISS = "https://portal.nousresearch.com"
+
+
+def _base_claims(**over):
+ now = int(time.time())
+ c = {
+ "aud": AUD,
+ "iss": ISS,
+ "purpose": "cron_fire",
+ "iat": now,
+ "nbf": now - 5,
+ "exp": now + 300,
+ }
+ c.update(over)
+ return c
+
+
+def test_valid_token_returns_claims(rsa_keys):
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, pub = rsa_keys
+ token = _mint(priv, _base_claims())
+ claims = verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=pub, issuer=ISS)
+ assert claims is not None
+ assert claims["purpose"] == "cron_fire"
+ assert claims["aud"] == AUD
+
+
+def test_wrong_audience_rejected(rsa_keys):
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, pub = rsa_keys
+ token = _mint(priv, _base_claims(aud="agent:someone-else"))
+ assert verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=pub, issuer=ISS) is None
+
+
+def test_missing_purpose_rejected(rsa_keys):
+ """A general agent JWT (no purpose=cron_fire) can't fire jobs."""
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, pub = rsa_keys
+ claims = _base_claims()
+ del claims["purpose"]
+ token = _mint(priv, claims)
+ assert verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=pub, issuer=ISS) is None
+
+
+def test_wrong_purpose_rejected(rsa_keys):
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, pub = rsa_keys
+ token = _mint(priv, _base_claims(purpose="inference"))
+ assert verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=pub, issuer=ISS) is None
+
+
+def test_expired_token_rejected(rsa_keys):
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, pub = rsa_keys
+ now = int(time.time())
+ token = _mint(priv, _base_claims(iat=now - 1000, nbf=now - 1000, exp=now - 600))
+ assert verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=pub, issuer=ISS) is None
+
+
+def test_wrong_issuer_rejected(rsa_keys):
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, pub = rsa_keys
+ token = _mint(priv, _base_claims(iss="https://evil.example"))
+ assert verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=pub, issuer=ISS) is None
+
+
+def test_tampered_signature_rejected(rsa_keys):
+ """A token signed by a DIFFERENT key must fail signature verification."""
+ from cryptography.hazmat.primitives import serialization
+ from cryptography.hazmat.primitives.asymmetric import rsa
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ _, pub = rsa_keys
+ attacker = rsa.generate_private_key(public_exponent=65537, key_size=2048)
+ attacker_priv = attacker.private_bytes(
+ encoding=serialization.Encoding.PEM,
+ format=serialization.PrivateFormat.PKCS8,
+ encryption_algorithm=serialization.NoEncryption(),
+ ).decode()
+ token = _mint(attacker_priv, _base_claims())
+ # Verified against the REAL public key → signature mismatch → None.
+ assert verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=pub, issuer=ISS) is None
+
+
+def test_no_key_configured_refuses(rsa_keys):
+ """No JWKS/key configured → refuse (never fall back to unsigned decode)."""
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, _ = rsa_keys
+ token = _mint(priv, _base_claims())
+ assert verify_nas_fire_token(token=token, expected_audience=AUD,
+ jwks_or_key=None) is None
+
+
+def test_empty_token_refused(rsa_keys):
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ _, pub = rsa_keys
+ assert verify_nas_fire_token(token="", expected_audience=AUD, jwks_or_key=pub) is None
+
+
+def test_jwks_url_path_resolves_key(rsa_keys, monkeypatch):
+ """The JWKS-URL branch resolves the signing key via PyJWKClient."""
+ from plugins.cron.chronos.verify import verify_nas_fire_token
+
+ priv, pub = rsa_keys
+ token = _mint(priv, _base_claims())
+
+ class FakeKey:
+ key = pub
+
+ class FakeJWKClient:
+ def __init__(self, url):
+ assert url == "https://portal.nousresearch.com/.well-known/jwks.json"
+
+ def get_signing_key_from_jwt(self, tok):
+ return FakeKey()
+
+ monkeypatch.setattr("jwt.PyJWKClient", FakeJWKClient)
+ claims = verify_nas_fire_token(
+ token=token, expected_audience=AUD,
+ jwks_or_key="https://portal.nousresearch.com/.well-known/jwks.json",
+ issuer=ISS,
+ )
+ assert claims is not None and claims["purpose"] == "cron_fire"
+
+
+def test_get_fire_verifier_returns_nas_verifier():
+ from plugins.cron.chronos.verify import get_fire_verifier, verify_nas_fire_token
+
+ assert get_fire_verifier() is verify_nas_fire_token
From b75757d4aa85e893d6e202c82a7c3392a57dee2e Mon Sep 17 00:00:00 2001
From: Ben
Date: Thu, 18 Jun 2026 15:11:32 +1000
Subject: [PATCH 011/470] =?UTF-8?q?feat(cron):=20wire=20on=5Fjobs=5Fchange?=
=?UTF-8?q?d,=20cron.chronos=20config,=20docs=20+=20agent=E2=86=94NAS=20co?=
=?UTF-8?q?ntract?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Phase 4F (F.1 + F.2 + F.3, agent side). F.4 is the operator-run live smoke
(needs a NAS deployment); recorded in the PR, not code.
F.1 — on_jobs_changed wiring:
- cron/scheduler.py: _notify_provider_jobs_changed() — resolve the active
provider, call on_jobs_changed(), swallow errors. Lives in scheduler.py (not
jobs.py) so the store stays free of provider imports (no import cycle).
- Wired at the consumer surfaces AFTER a successful mutation: the cronjob model
tool (tools/cronjob_tools.py, create/update/remove/pause/resume) — which the
`hermes cron` CLI also routes through — and the REST handlers
(gateway/platforms/api_server.py, same five). Built-in's no-op default = zero
behavior change on the default path. Sleeping-agent direct jobs.json writes
(no tool/CLI/REST) are covered by reconcile-on-wake in start().
F.2 — config: cron.chronos.{portal_url,callback_url,expected_audience,
nas_jwks_url}. All non-secret; the agent holds no scheduler creds and the
outbound provision call reuses the existing Nous token (no token key). Additive
deep-merge key, no version literal.
F.3 — docs:
- docs/chronos-managed-cron-contract.md: authoritative agent↔NAS wire contract
(the three agent-cron endpoints + inbound /api/cron/fire + the 3-hop trust
model + at-most-once/re-arm semantics). This is what the NAS-side agent builds
against.
- cron-internals.md: "Managed cron (Chronos) for scale-to-zero" section.
- cli-commands.md: cron.provider accepts chronos + the cron.chronos.* keys.
- User docs name no scheduler vendor (QStash is a NAS-internal detail).
INVARIANT re-verified: zero qstash/upstash hits across plugins/cron, gateway,
hermes_cli, tools, website/docs (the one remaining repo hit is an unrelated
Context7 MCP comment in tools/mcp_tool.py).
Tests: test_jobs_changed_notify (5) — notify calls provider hook, swallows
errors, built-in harmless, tool create/remove notify. Full cron + chronos +
webhook + config + api_server_jobs suites green (504 in the cron+chronos+webhook
run).
---
cron/scheduler.py | 18 ++
docs/chronos-managed-cron-contract.md | 192 ++++++++++++++++++
gateway/platforms/api_server.py | 15 ++
hermes_cli/config.py | 19 ++
tests/cron/test_jobs_changed_notify.py | 101 +++++++++
tools/cronjob_tools.py | 15 ++
.../docs/developer-guide/cron-internals.md | 42 ++++
website/docs/reference/cli-commands.md | 12 +-
8 files changed, 409 insertions(+), 5 deletions(-)
create mode 100644 docs/chronos-managed-cron-contract.md
create mode 100644 tests/cron/test_jobs_changed_notify.py
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 9bab59456ea..4f7940db0b1 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -2025,6 +2025,24 @@ def run_one_job(job: dict, *, adapters=None, loop=None, verbose: bool = False) -
return False
+def _notify_provider_jobs_changed() -> None:
+ """Best-effort: tell the active scheduler provider the job set changed.
+
+ Called by the consumer surfaces (model tool / CLI / REST) AFTER a
+ successful store mutation (create/update/remove/pause/resume) so an external
+ provider (Chronos) can re-provision/cancel the affected one-shot via NAS.
+ No-op for the built-in (it re-reads jobs.json each tick), so the default
+ path is unchanged. Lives here (not in cron/jobs.py) to keep the store free
+ of provider imports — avoids an import cycle and keeps jobs.py low-coupling.
+ Never raises into the caller.
+ """
+ try:
+ from cron.scheduler_provider import resolve_cron_scheduler
+ resolve_cron_scheduler().on_jobs_changed()
+ except Exception as e:
+ logger.debug("on_jobs_changed notify failed: %s", e)
+
+
def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> int:
"""
Check and run all due jobs.
diff --git a/docs/chronos-managed-cron-contract.md b/docs/chronos-managed-cron-contract.md
new file mode 100644
index 00000000000..0848d5eb939
--- /dev/null
+++ b/docs/chronos-managed-cron-contract.md
@@ -0,0 +1,192 @@
+# Chronos managed-cron — agent ↔ NAS wire contract
+
+**Status:** authoritative wire spec for the Chronos cron provider.
+**Audience:** the NAS-side implementer of the `agent-cron` endpoints
+(`nous-account-service`) and anyone debugging the managed-cron path.
+
+Chronos lets a hosted Hermes gateway **scale to zero** while idle and still
+fire cron jobs. Instead of an in-process 60-second ticker, the agent asks NAS
+to arm exactly **one external one-shot per job at that job's real next-fire
+time**. NAS calls the agent back at fire time over an authenticated webhook;
+the agent runs the job and re-arms the next one-shot. Between fires the agent
+process can be fully stopped — it wakes only on a genuine fire.
+
+The external scheduler NAS uses to implement the one-shots is an **internal NAS
+implementation detail**. The agent never talks to it, never holds its
+credentials, and never names it. The agent only knows the three NAS endpoints
+below.
+
+```
+create/update/pause/resume/remove a cron job (agent side)
+ │
+ ▼
+ChronosCronScheduler.reconcile() ── agent computes next_run_at
+ │ POST {portal}/api/agent-cron/provision (auth: agent's Nous access token)
+ ▼
+NAS arms a one-shot for fire_at ── NAS owns the scheduler + its creds
+ │
+ ⏰ at fire_at
+ ▼
+scheduler → POST {portal}/api/agent-cron/relay (auth: scheduler signature, NAS-verified)
+ │
+ ▼
+NAS mints a short-lived agent-audience JWT (purpose=cron_fire)
+ │ POST {agent_callback_url}/api/cron/fire (auth: that JWT)
+ ▼
+agent verifies the NAS JWT → store CAS claim → run_one_job → re-arm next one-shot
+```
+
+## Trust model (read this first)
+
+| Hop | Who calls whom | Auth mechanism | Verified by |
+|---|---|---|---|
+| 1 | agent → NAS (`provision`/`cancel`/`list`) | the agent's existing **Nous Portal access token** (Bearer) | NAS (its normal agent-token path) |
+| 2 | scheduler → NAS (`relay`) | the scheduler's request **signature** | NAS (the signature path it already has) |
+| 3 | NAS → agent (`/api/cron/fire`) | a **short-lived NAS-minted JWT** (`aud=agent:{instance_id}`, `purpose=cron_fire`) | agent (PyJWT against NAS JWKS) |
+
+Why NAS-mediated rather than scheduler→agent direct: the scheduler signs with
+**NAS's** keys, which the agent does not (and should not) hold. The agent can
+only verify a **NAS-minted** token — a trust path it already has. This keeps
+all scheduler credentials inside NAS. (Full rationale: the plan's DQ-4.)
+
+No new secret is introduced on the agent: hop 1 reuses the token the agent
+already uses for the portal, and hop 3 reuses the NAS-JWT verification the agent
+already performs.
+
+---
+
+## Endpoint 1 — `POST /api/agent-cron/provision` (agent → NAS)
+
+Arm (or re-arm, idempotently) exactly one one-shot for a job.
+
+- **Auth:** `Authorization: Bearer `. NAS validates via
+ its normal agent-token path and scopes the row to the calling agent/org.
+- **Request body:**
+ ```json
+ {
+ "job_id": "ab12cd34",
+ "fire_at": "2026-06-18T12:34:56+00:00",
+ "agent_callback_url": "https://agent-xyz.fly.dev",
+ "dedup_key": "ab12cd34:2026-06-18T12:34:56+00:00"
+ }
+ ```
+ - `fire_at` — ISO 8601, **agent-computed**. May be sub-minute in the future;
+ NAS must honor second-granularity (the agent owns the time, so there is no
+ 1-minute scheduler floor).
+ - `agent_callback_url` — the agent's own publicly-reachable base URL. NAS
+ POSTs `{agent_callback_url}/api/cron/fire` at fire time.
+ - `dedup_key` — `"{job_id}:{fire_at}"`. NAS **upserts by `(agent_id, job_id)`**
+ so re-arming the same fire is idempotent (no duplicate one-shots). A new
+ `fire_at` for the same `job_id` replaces the prior arm.
+- **Action:** arm one one-shot to fire at `fire_at`, destined for the NAS
+ **relay** route (Endpoint 3) — NOT the agent directly, so NAS stays in the
+ loop to mint the agent JWT. Persist `(agent_id, job_id, schedule_id,
+ agent_callback_url)`.
+- **Response:** `200 {"schedule_id": ""}`.
+
+## Endpoint 2 — `POST /api/agent-cron/cancel` (agent → NAS)
+
+- **Auth:** same as Endpoint 1.
+- **Body:** `{"job_id": "ab12cd34"}`.
+- **Action:** cancel the armed one-shot for `(agent_id, job_id)` and delete the
+ row. Idempotent — cancelling an unknown job is a 200 no-op.
+- **Response:** `200 {"ok": true}`.
+
+## Endpoint 3 — `POST /api/agent-cron/relay` (scheduler → NAS, the fire relay)
+
+- **Auth:** the scheduler's request **signature**, verified by NAS with the
+ signature path it already has. This is the trust boundary for the fire — a
+ forged relay call must be rejected here.
+- **Action:**
+ 1. Look up `(agent_id, job_id) → agent_callback_url` from the persisted row.
+ 2. Mint a **short-lived** JWT: `aud = "agent:{instance_id}"`,
+ `iss = {portal_url}`, `purpose = "cron_fire"`, small `exp` (≈60–120s),
+ signed with NAS's normal asymmetric signing key (published via JWKS).
+ 3. `POST {agent_callback_url}/api/cron/fire` with
+ `Authorization: Bearer ` and body `{"job_id": "...", "fire_at": "..."}`.
+ 4. Treat a non-2xx agent response as a **retryable** failure (let the
+ scheduler retry the relay). The agent's store CAS de-dupes a double fire,
+ so retries are safe.
+- **Response to the scheduler:** 2xx once the agent POST is accepted (202), so
+ the scheduler does not retry a delivered fire.
+
+---
+
+## Inbound `POST /api/cron/fire` (NAS → agent) — agent side, already implemented
+
+This is the agent endpoint NAS calls in Endpoint 3 step 3. Implemented on the
+`APIServerAdapter` (`gateway/platforms/api_server.py`); the verifier is
+`plugins/cron/chronos/verify.py`.
+
+- **Auth:** `Authorization: Bearer `. The agent verifies:
+ - signature against the NAS JWKS (`cron.chronos.nas_jwks_url`),
+ - `aud` == `cron.chronos.expected_audience` (this agent's
+ `agent:{instance_id}`),
+ - `iss` == `cron.chronos.portal_url`,
+ - `exp` / `nbf` (30s leeway),
+ - `purpose == "cron_fire"` — a general agent JWT (no/other purpose) is
+ rejected so it can't be replayed against this endpoint.
+- **Body:** `{"job_id": "ab12cd34", "fire_at": "..."}` (only `job_id` is used).
+- **Behavior:**
+ - invalid/missing/forged/expired/wrong-aud/wrong-purpose token → **401**, no
+ execution.
+ - missing `job_id` → **400**.
+ - valid → **202 `{"status": "accepted", "job_id": "..."}`** immediately, and
+ the job runs in the background. 202-before-run means a long agent turn never
+ trips the relay's HTTP timeout.
+- **At-most-once:** the agent claims the job with a store-level compare-and-set
+ (`claim_job_for_fire`) before running. A relay/scheduler retry that arrives
+ while the first fire is in flight (or after it completed) loses the claim and
+ does not double-run.
+
+---
+
+## At-most-once & re-arm semantics
+
+- **Recurring (cron/interval):** on fire, the agent advances `next_run_at`
+ (under its store lock) as part of the claim, runs the job, then re-provisions
+ a one-shot for the new `next_run_at`. A duplicate relay for the old `fire_at`
+ finds the claim taken / time advanced and is dropped.
+- **One-shot (`30m`, `+90s`, etc.):** fires once; `mark_job_run` marks it
+ completed. No re-arm.
+- **`repeat.times = N`:** `mark_job_run` deletes the job at the limit, so
+ `get_job` returns `None` after the final fire → the agent does **not** re-arm
+ → the schedule stops cleanly with no orphaned one-shot.
+- **Multi-replica agents:** the store CAS makes the fire at-most-once across N
+ gateway replicas sharing one `HERMES_HOME` — exactly one replica runs each
+ fire.
+
+## Reconcile (self-healing)
+
+The agent reconciles desired (`jobs.json`) vs armed on:
+- `start()` (gateway boot / wake),
+- every successful job mutation (`on_jobs_changed`),
+- piggybacked after each fire (re-arm).
+
+Reconcile arms missing/changed-time jobs and cancels orphans. A missed
+provision (transient NAS error) self-heals on the next reconcile. There is **no
+periodic wake** of a sleeping agent — that would negate scale-to-zero.
+
+## Config (agent side)
+
+All non-secret (`cron.chronos.*` in `config.yaml`); the agent holds no scheduler
+credentials. For hosted agents NAS sets these at provision time:
+
+| key | meaning |
+|---|---|
+| `cron.provider` | `"chronos"` to activate (empty = built-in ticker) |
+| `cron.chronos.portal_url` | NAS base URL (also the expected JWT `iss`) |
+| `cron.chronos.callback_url` | the agent's own public base URL for NAS→agent fires |
+| `cron.chronos.expected_audience` | this agent's JWT `aud` (`agent:{instance_id}`) |
+| `cron.chronos.nas_jwks_url` | NAS JWKS for verifying the fire JWT |
+
+If `callback_url` / `portal_url` is blank or the agent has no Nous login,
+`is_available()` returns False and the resolver falls back to the built-in
+in-process ticker — cron never loses its trigger.
+
+## Escape hatch (not default)
+
+The inbound `/api/cron/fire` verifier is pluggable (`get_fire_verifier()`). If
+relay volume through NAS ever saturates, a direct scheduler→agent mode with a
+per-job NAS-minted cron-key can replace the NAS-JWT verifier with **no change to
+the webhook handler**. NAS-mediated (this contract) is the default.
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index c657f4b4c6d..f7e1ba42f85 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -717,6 +717,16 @@ except ImportError:
_cron_resume = None
_cron_trigger = None
+
+def _notify_cron_provider_jobs_changed() -> None:
+ """Tell the active cron scheduler provider the job set changed after a REST
+ mutation (no-op for the built-in). Best-effort — never breaks the handler."""
+ try:
+ from cron.scheduler import _notify_provider_jobs_changed
+ _notify_provider_jobs_changed()
+ except Exception:
+ pass
+
# Defense-in-depth: mirror the agent-facing cronjob tool, which scans the
# user-supplied prompt for exfiltration/injection payloads at create/update
# time (tools/cronjob_tools.py). The REST cron endpoints are authenticated
@@ -3206,6 +3216,7 @@ class APIServerAdapter(BasePlatformAdapter):
kwargs["repeat"] = repeat
job = _cron_create(**kwargs)
+ _notify_cron_provider_jobs_changed()
return web.json_response({"job": job})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -3262,6 +3273,7 @@ class APIServerAdapter(BasePlatformAdapter):
job = _cron_update(job_id, sanitized)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
+ _notify_cron_provider_jobs_changed()
return web.json_response({"job": job})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -3281,6 +3293,7 @@ class APIServerAdapter(BasePlatformAdapter):
success = _cron_remove(job_id)
if not success:
return web.json_response({"error": "Job not found"}, status=404)
+ _notify_cron_provider_jobs_changed()
return web.json_response({"ok": True})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -3300,6 +3313,7 @@ class APIServerAdapter(BasePlatformAdapter):
job = _cron_pause(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
+ _notify_cron_provider_jobs_changed()
return web.json_response({"job": job})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -3319,6 +3333,7 @@ class APIServerAdapter(BasePlatformAdapter):
job = _cron_resume(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
+ _notify_cron_provider_jobs_changed()
return web.json_response({"job": job})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index d53393ac432..79f56be5d2e 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -2132,6 +2132,25 @@ DEFAULT_CONFIG = {
# An unknown or unavailable provider falls back to the built-in, so cron
# never loses its trigger.
"provider": "",
+ # Chronos (NAS-mediated managed cron) settings. Only consulted when
+ # provider == "chronos". All non-secret (URLs + the JWT audience): the
+ # agent holds NO external-scheduler credentials. For hosted agents, NAS
+ # sets these at provision time. The outbound provision call reuses the
+ # agent's existing Nous Portal token — there is no token key here.
+ "chronos": {
+ # NAS / portal base URL the agent calls to arm/cancel one-shots
+ # and that mints the inbound fire JWT (used as the expected issuer).
+ "portal_url": "https://portal.nousresearch.com",
+ # The agent's OWN publicly-reachable base URL for NAS→agent fires
+ # (NAS POSTs {callback_url}/api/cron/fire). Empty → Chronos is
+ # unavailable and the resolver falls back to the built-in ticker.
+ "callback_url": "",
+ # This agent's expected JWT audience (e.g. "agent:{instance_id}").
+ "expected_audience": "",
+ # NAS JWKS URL for verifying the inbound fire JWT's signature.
+ # Empty → the fire endpoint refuses all tokens (no unsigned decode).
+ "nas_jwks_url": "",
+ },
# Wrap delivered cron responses with a header (task name) and footer
# ("The agent cannot see this message"). Set to false for clean output.
"wrap_response": True,
diff --git a/tests/cron/test_jobs_changed_notify.py b/tests/cron/test_jobs_changed_notify.py
new file mode 100644
index 00000000000..eed875186b4
--- /dev/null
+++ b/tests/cron/test_jobs_changed_notify.py
@@ -0,0 +1,101 @@
+"""Tests for on_jobs_changed wiring (Phase 4F.1).
+
+After a store mutation via the consumer surfaces (model tool / CLI / REST), the
+active scheduler provider's on_jobs_changed() must be invoked so an external
+provider (Chronos) re-provisions/cancels. The built-in's no-op default means
+the default path is unchanged.
+"""
+
+import pytest
+
+
+@pytest.fixture
+def temp_home(tmp_path, monkeypatch):
+ monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+ yield tmp_path
+
+
+def test_notify_helper_calls_provider_on_jobs_changed(monkeypatch):
+ """cron.scheduler._notify_provider_jobs_changed resolves the provider and
+ calls on_jobs_changed exactly once."""
+ import cron.scheduler_provider as sp
+ import cron.scheduler as sched
+
+ calls = []
+
+ class Spy(sp.CronScheduler):
+ @property
+ def name(self):
+ return "spy"
+
+ def start(self, stop_event, **kw):
+ pass
+
+ def on_jobs_changed(self):
+ calls.append(1)
+
+ monkeypatch.setattr(sp, "resolve_cron_scheduler", lambda: Spy())
+ sched._notify_provider_jobs_changed()
+ assert calls == [1]
+
+
+def test_notify_helper_swallows_provider_errors(monkeypatch):
+ """A provider that raises in on_jobs_changed must not propagate into the
+ caller (best-effort notify)."""
+ import cron.scheduler_provider as sp
+ import cron.scheduler as sched
+
+ class Boom(sp.CronScheduler):
+ @property
+ def name(self):
+ return "boom"
+
+ def start(self, stop_event, **kw):
+ pass
+
+ def on_jobs_changed(self):
+ raise RuntimeError("kaboom")
+
+ monkeypatch.setattr(sp, "resolve_cron_scheduler", lambda: Boom())
+ sched._notify_provider_jobs_changed() # must not raise
+
+
+def test_builtin_notify_is_harmless(monkeypatch):
+ """With the built-in provider (default), notify is a no-op and never
+ raises."""
+ import cron.scheduler as sched
+ # default resolution → built-in; just assert it doesn't blow up.
+ sched._notify_provider_jobs_changed()
+
+
+def test_tool_create_notifies_provider(temp_home, monkeypatch):
+ """Creating a job via the cronjob tool path invokes on_jobs_changed."""
+ import cron.scheduler as sched
+ calls = []
+ monkeypatch.setattr(sched, "_notify_provider_jobs_changed",
+ lambda: calls.append("changed"))
+
+ from tools.cronjob_tools import cronjob
+ import json
+
+ out = json.loads(cronjob(action="create", prompt="echo hi", schedule="every 5m", name="w"))
+ assert out["success"] is True
+ assert calls == ["changed"]
+
+
+def test_tool_remove_notifies_provider(temp_home, monkeypatch):
+ """Removing a job via the tool path invokes on_jobs_changed."""
+ import json
+ from tools.cronjob_tools import cronjob
+
+ created = json.loads(cronjob(action="create", prompt="x", schedule="every 5m", name="r"))
+ jid = created["job_id"]
+
+ import cron.scheduler as sched
+ calls = []
+ monkeypatch.setattr(sched, "_notify_provider_jobs_changed",
+ lambda: calls.append("changed"))
+
+ out = json.loads(cronjob(action="remove", job_id=jid))
+ assert out["success"] is True
+ assert calls == ["changed"]
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 7ec31b806c4..0bd62b2fc37 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -33,6 +33,16 @@ from cron.jobs import (
)
+def _notify_provider_jobs_changed_safe() -> None:
+ """Tell the active cron scheduler provider the job set changed (no-op for
+ the built-in). Best-effort — never lets a provider error break the tool."""
+ try:
+ from cron.scheduler import _notify_provider_jobs_changed
+ _notify_provider_jobs_changed()
+ except Exception:
+ pass
+
+
# ---------------------------------------------------------------------------
# Cron prompt scanning
# ---------------------------------------------------------------------------
@@ -549,6 +559,7 @@ def cronjob(
workdir=_normalize_optional_job_value(workdir),
no_agent=_no_agent,
)
+ _notify_provider_jobs_changed_safe()
return json.dumps(
{
"success": True,
@@ -604,6 +615,7 @@ def cronjob(
removed = remove_job(job_id)
if not removed:
return tool_error(f"Failed to remove job '{job_id}'", success=False)
+ _notify_provider_jobs_changed_safe()
return json.dumps(
{
"success": True,
@@ -619,10 +631,12 @@ def cronjob(
if normalized == "pause":
updated = pause_job(job_id, reason=reason)
+ _notify_provider_jobs_changed_safe()
return json.dumps({"success": True, "job": _format_job(updated)}, indent=2)
if normalized == "resume":
updated = resume_job(job_id)
+ _notify_provider_jobs_changed_safe()
return json.dumps({"success": True, "job": _format_job(updated)}, indent=2)
if normalized in {"run", "run_now", "trigger"}:
@@ -711,6 +725,7 @@ def cronjob(
if not updates:
return tool_error("No updates provided.", success=False)
updated = update_job(job_id, updates)
+ _notify_provider_jobs_changed_safe()
return json.dumps({"success": True, "job": _format_job(updated)}, indent=2)
return tool_error(f"Unknown cron action '{action}'", success=False)
diff --git a/website/docs/developer-guide/cron-internals.md b/website/docs/developer-guide/cron-internals.md
index c895d339b09..386302554d7 100644
--- a/website/docs/developer-guide/cron-internals.md
+++ b/website/docs/developer-guide/cron-internals.md
@@ -129,6 +129,48 @@ A provider only controls the trigger, never execution.
In CLI mode, cron jobs only fire when `hermes cron` commands are run or during active CLI sessions.
+### Managed cron (Chronos) for scale-to-zero
+
+Hosted gateways can run the **Chronos** provider (`cron.provider: chronos`)
+instead of the built-in ticker. Chronos lets an idle gateway **scale to zero**
+and still fire cron jobs: rather than a 60-second in-process loop (which would
+keep the process awake), it asks Nous infrastructure to arm exactly **one
+managed one-shot per job at that job's real next-fire time**. At fire time Nous
+calls the gateway back over an authenticated webhook (`POST /api/cron/fire`);
+the gateway runs the job through the same `run_one_job` path as the built-in,
+then re-arms the next one-shot. Between fires the process can be fully stopped —
+it wakes only on a genuine fire, never on a periodic timer.
+
+The flow (the managed scheduler is provided by Nous; the agent holds no
+scheduler credentials):
+
+```
+create/update a cron job
+ → Chronos asks Nous to arm a one-shot at the job's next_run_at
+ (authenticated with the agent's existing Nous token)
+ → at fire time Nous calls the gateway: POST {callback_url}/api/cron/fire
+ (authenticated with a short-lived, purpose-scoped Nous-minted JWT)
+ → the gateway verifies the token, claims the job (store compare-and-set so
+ multi-replica deployments fire at-most-once), runs it, and re-arms the next
+ one-shot
+```
+
+Config (all non-secret; on hosted agents Nous sets these at provision time):
+
+| key | meaning |
+|---|---|
+| `cron.provider` | `chronos` to activate (empty = built-in ticker) |
+| `cron.chronos.portal_url` | Nous base URL (arming + the fire-token issuer) |
+| `cron.chronos.callback_url` | the gateway's own public base URL for inbound fires |
+| `cron.chronos.expected_audience` | this agent's fire-token audience |
+| `cron.chronos.nas_jwks_url` | key set for verifying the inbound fire token |
+
+If Chronos is misconfigured or the agent isn't logged into Nous,
+`resolve_cron_scheduler()` falls back to the built-in ticker (logged warning) —
+cron never loses its trigger. Recurring jobs re-arm after each fire; `repeat`-N
+jobs stop cleanly when the count is exhausted (no orphaned one-shot). The full
+agent↔Nous wire contract lives in `docs/chronos-managed-cron-contract.md`.
+
### Fresh Session Isolation
Each cron job runs in a completely fresh agent session:
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index f0fe67d4349..0cf004f1a0c 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -534,11 +534,13 @@ hermes cron
| `tick` | Run due jobs once and exit. |
The cron **trigger** is pluggable via the `cron.provider` config key. Empty
-(the default) uses the built-in in-process ticker. A named provider (e.g.
-`chronos`, a managed-cron provider for scale-to-zero deployments) is discovered
-from `plugins/cron//` or `$HERMES_HOME/plugins//`; an unknown or
-unavailable provider falls back to the built-in, so cron is never left without
-a trigger. See the [cron internals](../developer-guide/cron-internals.md#gateway-integration) doc.
+(the default) uses the built-in in-process ticker. Set it to `chronos` (the
+NAS-managed provider for scale-to-zero hosted gateways) — configured via the
+`cron.chronos.*` keys (`portal_url`, `callback_url`, `expected_audience`,
+`nas_jwks_url`) — or name a custom provider under `plugins/cron//` or
+`$HERMES_HOME/plugins//`. An unknown or unavailable provider falls back to
+the built-in, so cron is never left without a trigger. See the
+[cron internals](../developer-guide/cron-internals.md#gateway-integration) doc.
## `hermes kanban`
From 6752da9a7735add1aff6ebc632c7e83fc4005a48 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Thu, 18 Jun 2026 11:32:18 +0530
Subject: [PATCH 012/470] fix(dashboard): clean up upload temp file on client
disconnect + pin python-multipart (NS-501)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Follow-up to #47663 (streaming multipart upload), fixing two issues that
landed with it.
1. Temp file leaked on client disconnect. The streaming upload endpoint's
except chain caught only HTTPException / PermissionError / OSError — all
Exception subclasses. asyncio.CancelledError, raised when a browser aborts
a large upload mid-stream (the exact NS-501 scenario), is a BaseException,
so it bypassed every except clause and reached a finally that only closed
the file handle and never unlinked the temp file. Every aborted large
upload orphaned a partial `.{name}.*.upload` file (up to ~100 MB) in the
target directory. Cleanup now lives in finally, keyed on a `renamed`
success flag, so the temp file is removed on every non-success exit
including BaseException paths. Added test_stream_upload_cleans_temp_on_cancellation,
which fails on the pre-fix code (leaks the temp file) and passes with the fix.
2. python-multipart pinned to ==0.0.27 instead of ==0.0.20. The package was
already resolved at 0.0.27 transitively (via daytona) before #47663; the
explicit ==0.0.20 pin in the [web] extra and the tool.dashboard lazy-install
set downgraded it. Bumped both to ==0.0.27 and regenerated with `uv lock`,
keeping the lockfile coherent. The base dependency stays >=0.0.9,<1.
---
hermes_cli/web_server.py | 12 ++++--
pyproject.toml | 2 +-
tests/hermes_cli/test_web_server_files.py | 52 +++++++++++++++++++++++
tools/lazy_deps.py | 2 +-
uv.lock | 8 ++--
5 files changed, 67 insertions(+), 9 deletions(-)
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index ed619979bfb..ad82d9fdfef 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -1529,6 +1529,7 @@ async def upload_managed_file_stream(
)
tmp_path = Path(tmp_name)
total = 0
+ renamed = False
try:
with os.fdopen(tmp_fd, "wb") as out:
while True:
@@ -1540,16 +1541,21 @@ async def upload_managed_file_stream(
raise HTTPException(status_code=413, detail="File is too large")
out.write(chunk)
os.replace(tmp_path, target)
+ renamed = True
except HTTPException:
- tmp_path.unlink(missing_ok=True)
raise
except PermissionError:
- tmp_path.unlink(missing_ok=True)
raise HTTPException(status_code=403, detail="File is not writable")
except OSError as exc:
- tmp_path.unlink(missing_ok=True)
raise HTTPException(status_code=500, detail=f"Could not write file: {exc}")
finally:
+ # Clean up the temp file on every non-success exit, including
+ # BaseException paths the `except` clauses above don't catch — most
+ # importantly asyncio.CancelledError when a browser aborts a large
+ # upload mid-stream (the exact NS-501 scenario). os.replace clears
+ # tmp_path on success, so only unlink when the rename didn't happen.
+ if not renamed:
+ tmp_path.unlink(missing_ok=True)
await file.close()
return {
diff --git a/pyproject.toml b/pyproject.toml
index 6e371126dd2..cab849dc755 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -258,7 +258,7 @@ youtube = [
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
# starlette==1.0.1 pinned for CVE-2026-48710 (BadHost) — fastapi pulls Starlette
# transitively and pre-1.0.1 is the vulnerable range. See the mcp extra above.
-web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.20"]
+web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.27"]
all = [
# Policy (2026-05-12): `[all]` includes only extras that genuinely
# CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
diff --git a/tests/hermes_cli/test_web_server_files.py b/tests/hermes_cli/test_web_server_files.py
index 46ba18b1355..b295f0ab998 100644
--- a/tests/hermes_cli/test_web_server_files.py
+++ b/tests/hermes_cli/test_web_server_files.py
@@ -436,3 +436,55 @@ def test_stream_upload_large_file_under_cap_succeeds(forced_files_client, monkey
assert created.status_code == 200
assert file_path.stat().st_size == len(payload)
assert file_path.read_bytes() == payload
+
+
+def test_stream_upload_cleans_temp_on_cancellation(forced_files_client):
+ """A client disconnect mid-stream (asyncio.CancelledError) must not leak a temp file.
+
+ CancelledError is a BaseException, not an Exception, so it bypasses the
+ endpoint's ``except`` clauses entirely. The cleanup therefore lives in a
+ ``finally`` keyed on a success flag — without it, every aborted large
+ upload (the exact NS-501 scenario) would orphan a partial ``.upload`` temp
+ file in the target directory. We invoke the endpoint coroutine directly so
+ the BaseException propagates instead of being swallowed by the test client.
+ """
+ import asyncio
+
+ _client, root = forced_files_client
+ target = root / "out" / "aborted.bin"
+ target.parent.mkdir(parents=True, exist_ok=True)
+
+ class _AbortingUpload:
+ """UploadFile stand-in that yields one chunk then aborts like a dropped client."""
+
+ filename = "aborted.bin"
+
+ def __init__(self):
+ self._calls = 0
+
+ async def read(self, _size):
+ self._calls += 1
+ if self._calls == 1:
+ return b"partial chunk before the client vanished"
+ raise asyncio.CancelledError()
+
+ async def close(self):
+ return None
+
+ request = SimpleNamespace()
+
+ with pytest.raises(asyncio.CancelledError):
+ asyncio.run(
+ web_server.upload_managed_file_stream(
+ request=request,
+ file=_AbortingUpload(),
+ path=str(target),
+ overwrite=True,
+ )
+ )
+
+ # No partial data was promoted into place ...
+ assert not target.exists()
+ # ... and no .upload temp file was left behind.
+ leftovers = [p.name for p in target.parent.iterdir() if ".upload" in p.name]
+ assert leftovers == [], f"temp upload files leaked on cancellation: {leftovers}"
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
index 98bacbf42a0..4e2159a1a02 100644
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@@ -178,7 +178,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
"fastapi==0.133.1",
"uvicorn[standard]==0.41.0",
"starlette==1.0.1", # CVE-2026-48710 (BadHost) — keep lazy-install in sync with pyproject [web]
- "python-multipart==0.0.20", # FastAPI UploadFile/Form for streaming uploads (NS-501)
+ "python-multipart==0.0.27", # FastAPI UploadFile/Form for streaming uploads (NS-501)
),
# Vision image-resize recovery (Pillow). Pillow is now a CORE dependency
# (pyproject `dependencies`), so this entry is a belt-and-suspenders fallback
diff --git a/uv.lock b/uv.lock
index fc340bdbe89..095b7563311 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1713,7 +1713,7 @@ requires-dist = [
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
{ name = "python-dotenv", specifier = "==1.2.2" },
{ name = "python-multipart", specifier = ">=0.0.9,<1" },
- { name = "python-multipart", marker = "extra == 'web'", specifier = "==0.0.20" },
+ { name = "python-multipart", marker = "extra == 'web'", specifier = "==0.0.27" },
{ name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = "==22.6" },
{ name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = "==22.6" },
{ name = "pywinpty", marker = "sys_platform == 'win32'", specifier = ">=2.0.0,<3" },
@@ -3317,11 +3317,11 @@ wheels = [
[[package]]
name = "python-multipart"
-version = "0.0.20"
+version = "0.0.27"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158, upload-time = "2024-12-16T19:45:46.972Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/9b/f23807317a113dc36e74e75eb265a02dd1a4d9082abc3c1064acd22997c4/python_multipart-0.0.27.tar.gz", hash = "sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602", size = 44043, upload-time = "2026-04-27T10:51:26.649Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" },
+ { url = "https://files.pythonhosted.org/packages/99/78/4126abcbdbd3c559d43e0db7f7b9173fc6befe45d39a2856cc0b8ec2a5a6/python_multipart-0.0.27-py3-none-any.whl", hash = "sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645", size = 29254, upload-time = "2026-04-27T10:51:24.997Z" },
]
[[package]]
From b892ee2bcf1b65f3010c7229f4d61e574ada54ad Mon Sep 17 00:00:00 2001
From: xxxigm
Date: Tue, 16 Jun 2026 21:20:14 +0700
Subject: [PATCH 013/470] fix(agent): summarize non-retryable API errors so raw
HTML never leaks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a non-retryable client error aborts the turn (e.g. a Codex/Cloudflare
HTTP 403 "managed challenge" page), the conversation loop returned the
failure dict with `error: str(api_error)` — the entire ~60KB HTML page.
Downstream consumers deliver that field verbatim: a cron job dumped a
Cloudflare challenge page to Discord, where it was split into ~31 messages.
The sibling "max retries exhausted" path already collapses such bodies via
`_summarize_api_error` (which extracts the / status from HTML error
pages). This makes the non-retryable path consistent: compute the summary
once and use it for both the status emit and the returned `error`.
---
agent/conversation_loop.py | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py
index ef69ac68329..163a508a8cd 100644
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -3197,15 +3197,22 @@ def run_conversation(
# Terminal — flush buffered context so the user sees
# what was tried before the abort.
agent._flush_status_buffer()
+ # Summarize once: Cloudflare/proxy HTML challenge pages and
+ # other raw provider bodies must be collapsed to a short
+ # one-liner here, otherwise the full page leaks into the
+ # returned ``error`` field and downstream consumers deliver
+ # it verbatim (e.g. a cron failure notification dumped a
+ # ~60KB Cloudflare challenge page as 31 Discord messages).
+ _nonretryable_summary = agent._summarize_api_error(api_error)
if classified.reason == FailoverReason.content_policy_blocked:
agent._emit_status(
f"❌ Provider safety filter blocked this request: "
- f"{agent._summarize_api_error(api_error)}"
+ f"{_nonretryable_summary}"
)
else:
agent._emit_status(
f"❌ Non-retryable error (HTTP {status_code}): "
- f"{agent._summarize_api_error(api_error)}"
+ f"{_nonretryable_summary}"
)
agent._vprint(f"{agent.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
agent._vprint(f"{agent.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True)
@@ -3309,7 +3316,7 @@ def run_conversation(
"api_calls": api_call_count,
"completed": False,
"failed": True,
- "error": str(api_error),
+ "error": _nonretryable_summary,
}
if retry_count >= max_retries:
From f18f31ebf6dda993ade9f9de222fcf7fdfe8952e Mon Sep 17 00:00:00 2001
From: xxxigm
Date: Thu, 18 Jun 2026 14:55:38 +0700
Subject: [PATCH 014/470] test(agent): cover non-retryable error HTML
summarization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Locks the contract that a non-retryable failure (a Cloudflare 403
"managed challenge" page) returns a short, HTML-free `error` field —
guarding the field path where the raw page was dumped to Discord as
~31 messages.
The test drives the standard chat-completions path with a concrete
model so the turn actually reaches `client.chat.completions.create`,
where the mocked 403 is raised. It asserts the create call happened
(guarding against a vacuous pass — an empty model on the Codex
Responses path would otherwise abort on a validation ValueError before
any API call) and that the summarized error includes "403" while
excluding / _cf_chl_opt. The non-retryable abort path is
provider-agnostic; a Cloudflare managed-challenge 403 can surface on
any provider behind Cloudflare.
---
.../test_nonretryable_error_html_summary.py | 130 ++++++++++++++++++
1 file changed, 130 insertions(+)
create mode 100644 tests/run_agent/test_nonretryable_error_html_summary.py
diff --git a/tests/run_agent/test_nonretryable_error_html_summary.py b/tests/run_agent/test_nonretryable_error_html_summary.py
new file mode 100644
index 00000000000..db765b124f3
--- /dev/null
+++ b/tests/run_agent/test_nonretryable_error_html_summary.py
@@ -0,0 +1,130 @@
+"""Regression: non-retryable API failures must not leak raw HTML pages.
+
+A scheduled cron job fell back to the Codex (``chatgpt.com``) provider, which
+returned a Cloudflare *challenge* page (HTTP 403) instead of a normal API
+response. The conversation loop classified this as a non-retryable client
+error and returned the failure dict — but the ``error`` field carried
+``str(api_error)``, i.e. the entire ~60 KB Cloudflare HTML page. The cron
+scheduler then delivered that verbatim to Discord, where it was split into
+~31 messages (the reporter's "31 part discord message which is cloudflares
+challenge page").
+
+The sibling "max retries exhausted" path already summarized the error via
+``_summarize_api_error`` (which collapses HTML pages to a one-liner); the
+non-retryable path did not. These tests lock the contract: whichever
+terminal path is taken, ``result['error']`` is a short, HTML-free summary.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import run_agent
+from run_agent import AIAgent
+
+
+# A representative Cloudflare "managed challenge" body, matching the shape the
+# Codex backend returned in the field report (no , large inline
+# ``_cf_chl_opt`` script). Padded so length-based assertions are meaningful.
+_CLOUDFLARE_CHALLENGE_HTML = (
+ "\n\n \n"
+ ' \n'
+ " \n
"
+ "
\n \n\n"
+)
+
+
+def _make_403_html_error() -> Exception:
+ """An exception mimicking a Codex 403 whose body is a Cloudflare page."""
+ err = Exception(_CLOUDFLARE_CHALLENGE_HTML)
+ err.status_code = 403
+ return err
+
+
+def _make_agent() -> AIAgent:
+ # Drive the standard chat-completions path with a concrete model so the
+ # turn actually reaches ``client.chat.completions.create`` — that is where
+ # the mocked 403 is raised. The non-retryable abort being exercised lives
+ # in the shared conversation loop and is provider-agnostic; a Cloudflare
+ # "managed challenge" 403 can surface on any provider sitting behind
+ # Cloudflare (it was first reported on the Codex backend). Pinning
+ # ``api_mode`` + ``model`` here avoids the earlier abort the previous
+ # revision hit: an empty model on the Codex Responses path raised a
+ # validation ``ValueError`` *before* any API call, so the test passed
+ # without ever touching the 403 summarization path.
+ with (
+ patch("run_agent.get_tool_definitions", return_value=[]),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ base_url="https://api.openai.com/v1",
+ provider="openai",
+ api_mode="chat_completions",
+ model="gpt-5.5",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ a.client = MagicMock()
+ a._cached_system_prompt = "You are helpful."
+ a._use_prompt_caching = False
+ a.tool_delay = 0
+ a.compression_enabled = False
+ a.save_trajectories = False
+ return a
+
+
+def test_summarize_collapses_cloudflare_challenge_page():
+ """``_summarize_api_error`` must never echo the raw HTML body."""
+ summary = AIAgent._summarize_api_error(_make_403_html_error())
+
+ assert "
Date: Thu, 18 Jun 2026 15:46:47 +0530
Subject: [PATCH 015/470] refactor(agent): reuse hoisted summary in
content-policy branch
The non-retryable abort path now computes _nonretryable_summary once and
reuses it at the emit sites and the returned error field. The
content-policy-blocked return branch still recomputed the identical
value into a separate _summary local, half-honoring the 'summarize once'
intent. _summarize_api_error is a pure staticmethod and api_error is
never reassigned in this block, so _summary was provably byte-identical
to _nonretryable_summary. Reuse the hoisted value and drop the redundant
call. Behavior-preserving.
---
agent/conversation_loop.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py
index 163a508a8cd..0ccc9649428 100644
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -3297,18 +3297,17 @@ def run_conversation(
else:
agent._persist_session(messages, conversation_history)
if classified.reason == FailoverReason.content_policy_blocked:
- _summary = agent._summarize_api_error(api_error)
_policy_response = (
"⚠️ The model provider's safety filter blocked this request "
"(not a Hermes/gateway failure).\n\n"
- f"Provider message: {_summary}\n\n"
+ f"Provider message: {_nonretryable_summary}\n\n"
f"{_CONTENT_POLICY_RECOVERY_HINT}"
)
return _content_policy_blocked_result(
messages,
api_call_count,
final_response=_policy_response,
- error_detail=_summary,
+ error_detail=_nonretryable_summary,
)
return {
"final_response": None,
From d573e7c9e1639d7c98c02f3face6f599464f8758 Mon Sep 17 00:00:00 2001
From: emozilla
Date: Thu, 18 Jun 2026 16:00:26 -0400
Subject: [PATCH 016/470] fix(dashboard): use DS Button prefix/size API instead
of inline icons
@nous-research/ui@0.18.2 Button is grid-based: size=xs is an
aspect-square icon-only box, and icons belong in prefix/suffix.
The dashboard used shadcn-style size=xs + inline text
children, which forced text buttons into broken tall squares
(Configure, Run setup, Select, Save keys) and split icon/label
across grid columns elsewhere (Schedule it, Prune/Delete actions).
Move leading icons to prefix and size text buttons as sm/default.
For the post-setup spinner, drive the spin from a button-level
[&_svg]:animate-spin selector since the prefix slot clones the
icon and overwrites its className.
- ToolsetConfigDrawer: Select, Save keys, Run setup
- SkillsPage: New skill, Configure
- AutomationBlueprints: Schedule it
- SessionsPage: Prune old sessions, Delete empty, Delete selected
---
web/src/components/AutomationBlueprints.tsx | 7 +++--
web/src/components/ToolsetConfigDrawer.tsx | 32 ++++++++++++---------
web/src/pages/SessionsPage.tsx | 7 ++---
web/src/pages/SkillsPage.tsx | 7 ++---
4 files changed, 30 insertions(+), 23 deletions(-)
diff --git a/web/src/components/AutomationBlueprints.tsx b/web/src/components/AutomationBlueprints.tsx
index 10d1270fa05..209c75e0682 100644
--- a/web/src/components/AutomationBlueprints.tsx
+++ b/web/src/components/AutomationBlueprints.tsx
@@ -149,8 +149,11 @@ function BlueprintCard({
))}
From 83c034bd5bc855955a825ff4acd1ed11edab6c3d Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 19 Jun 2026 12:18:15 +0530
Subject: [PATCH 038/470] fix(dashboard): accept Slack allow-all wildcard in
allowed-users validation
The new SLACK_ALLOWED_USERS validation rejected '*', but the Slack gateway
honors '*' as an allow-all wildcard (gateway/platforms/slack.py DM auth,
slash-confirm, and approval-button paths). Accept '*' as a valid list entry
in both the API validator and the dashboard form so a value the runtime
honors is no longer blocked at setup.
---
hermes_cli/web_server.py | 4 +++-
tests/hermes_cli/test_web_server.py | 13 +++++++++++++
web/src/pages/ChannelsPage.tsx | 2 +-
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index b890f68649e..316bc154fa4 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -2342,10 +2342,12 @@ def _validate_messaging_env_value(platform_id: str, key: str, value: str) -> Non
)
if key == "SLACK_ALLOWED_USERS":
user_ids = [part.strip() for part in value.split(",")]
+ # "*" is the gateway's allow-all wildcard (see gateway/platforms/slack.py),
+ # so accept it as a valid entry alongside Slack member IDs (U.../W...).
invalid = [
user_id
for user_id in user_ids
- if not user_id or not re.fullmatch(r"[UW][A-Z0-9]{2,}", user_id)
+ if user_id != "*" and (not user_id or not re.fullmatch(r"[UW][A-Z0-9]{2,}", user_id))
]
if invalid:
raise HTTPException(
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index d44c789b3e3..d7a4dbcbbf9 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -1687,6 +1687,19 @@ class TestWebServerEndpoints:
assert resp.status_code == 400
assert "member IDs" in resp.json()["detail"]
+ def test_update_messaging_platform_accepts_slack_allowed_users_wildcard(self):
+ # "*" is the gateway's allow-all wildcard (gateway/platforms/slack.py),
+ # so the dashboard must accept it rather than rejecting it as malformed.
+ from hermes_cli.config import load_env
+
+ resp = self.client.put(
+ "/api/messaging/platforms/slack",
+ json={"env": {"SLACK_ALLOWED_USERS": "*"}},
+ )
+
+ assert resp.status_code == 200
+ assert load_env()["SLACK_ALLOWED_USERS"] == "*"
+
def test_messaging_platform_test_reports_missing_required_setup(self):
resp = self.client.put("/api/messaging/platforms/discord", json={"enabled": True})
assert resp.status_code == 200
diff --git a/web/src/pages/ChannelsPage.tsx b/web/src/pages/ChannelsPage.tsx
index 84791738a25..db56beb1925 100644
--- a/web/src/pages/ChannelsPage.tsx
+++ b/web/src/pages/ChannelsPage.tsx
@@ -76,7 +76,7 @@ function validateMessagingEnvField(field: MessagingPlatformEnvVar, value: string
if (parts.some((part) => !part)) {
return "Slack member IDs must be comma-separated without empty entries.";
}
- const invalid = parts.find((part) => !SLACK_MEMBER_ID_RE.test(part));
+ const invalid = parts.find((part) => part !== "*" && !SLACK_MEMBER_ID_RE.test(part));
if (invalid) {
return `${invalid} does not look like a Slack member ID. Use IDs like U01ABC2DEF3.`;
}
From 1ab6f34791e28559911185b308d8bd1b0be5f393 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 19 Jun 2026 12:22:30 +0530
Subject: [PATCH 039/470] refactor(dashboard): align Slack allowlist validation
with gateway parse
- Drop empty entries before validating SLACK_ALLOWED_USERS so a trailing or
interior comma (which the gateway silently tolerates in
gateway/platforms/slack.py) is no longer rejected at the dashboard.
- Hoist the member-ID regex to a module-level _SLACK_MEMBER_ID_RE constant
and note it stays in sync with the frontend SLACK_MEMBER_ID_RE.
- Add a regression test for the trailing-comma case.
---
hermes_cli/web_server.py | 14 ++++++++++----
tests/hermes_cli/test_web_server.py | 13 +++++++++++++
web/src/pages/ChannelsPage.tsx | 11 +++++++----
3 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 316bc154fa4..b0d51e2481e 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -2325,6 +2325,11 @@ def _gateway_display_command(profile: Optional[str], verb: str) -> str:
return " ".join(["hermes", *_gateway_subcommand(profile, verb)])
+# Slack member IDs (users U..., Enterprise Grid W...). Kept in sync with the
+# frontend SLACK_MEMBER_ID_RE in web/src/pages/ChannelsPage.tsx.
+_SLACK_MEMBER_ID_RE = re.compile(r"[UW][A-Z0-9]{2,}")
+
+
def _validate_messaging_env_value(platform_id: str, key: str, value: str) -> None:
"""Reject platform credentials that are clearly in the wrong field."""
if platform_id != "slack" or not value:
@@ -2341,13 +2346,14 @@ def _validate_messaging_env_value(platform_id: str, key: str, value: str) -> Non
detail="Slack App Token must start with xapp-. Paste the app-level token from Basic Information > App-Level Tokens.",
)
if key == "SLACK_ALLOWED_USERS":
- user_ids = [part.strip() for part in value.split(",")]
- # "*" is the gateway's allow-all wildcard (see gateway/platforms/slack.py),
- # so accept it as a valid entry alongside Slack member IDs (U.../W...).
+ # Mirror the gateway's parse (gateway/platforms/slack.py): split on comma,
+ # strip, and drop empty entries so a trailing/interior comma isn't rejected
+ # here when the runtime would accept it. "*" is the allow-all wildcard.
+ user_ids = [part.strip() for part in value.split(",") if part.strip()]
invalid = [
user_id
for user_id in user_ids
- if user_id != "*" and (not user_id or not re.fullmatch(r"[UW][A-Z0-9]{2,}", user_id))
+ if user_id != "*" and not _SLACK_MEMBER_ID_RE.fullmatch(user_id)
]
if invalid:
raise HTTPException(
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index d7a4dbcbbf9..7416ec0b87a 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -1700,6 +1700,19 @@ class TestWebServerEndpoints:
assert resp.status_code == 200
assert load_env()["SLACK_ALLOWED_USERS"] == "*"
+ def test_update_messaging_platform_accepts_slack_allowed_users_trailing_comma(self):
+ # The gateway drops empty entries (gateway/platforms/slack.py), so a
+ # trailing/interior comma must not be rejected by the dashboard.
+ from hermes_cli.config import load_env
+
+ resp = self.client.put(
+ "/api/messaging/platforms/slack",
+ json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,,W04XYZ5LMN6,"}},
+ )
+
+ assert resp.status_code == 200
+ assert load_env()["SLACK_ALLOWED_USERS"] == "U01ABC2DEF3,,W04XYZ5LMN6,"
+
def test_messaging_platform_test_reports_missing_required_setup(self):
resp = self.client.put("/api/messaging/platforms/discord", json={"enabled": True})
assert resp.status_code == 200
diff --git a/web/src/pages/ChannelsPage.tsx b/web/src/pages/ChannelsPage.tsx
index db56beb1925..7658c0cd61a 100644
--- a/web/src/pages/ChannelsPage.tsx
+++ b/web/src/pages/ChannelsPage.tsx
@@ -72,10 +72,13 @@ function validateMessagingEnvField(field: MessagingPlatformEnvVar, value: string
}
if (field.key === "SLACK_ALLOWED_USERS") {
- const parts = trimmed.split(",").map((part) => part.trim());
- if (parts.some((part) => !part)) {
- return "Slack member IDs must be comma-separated without empty entries.";
- }
+ // Mirror the gateway's parse (gateway/platforms/slack.py): drop empty
+ // entries so a trailing/interior comma isn't rejected here. "*" is the
+ // allow-all wildcard the gateway honors.
+ const parts = trimmed
+ .split(",")
+ .map((part) => part.trim())
+ .filter(Boolean);
const invalid = parts.find((part) => part !== "*" && !SLACK_MEMBER_ID_RE.test(part));
if (invalid) {
return `${invalid} does not look like a Slack member ID. Use IDs like U01ABC2DEF3.`;
From c7b7f92ec14a5c43deef844804f0bf6a7f2d992d Mon Sep 17 00:00:00 2001
From: Eurekaxun
Date: Tue, 2 Jun 2026 14:33:12 +0800
Subject: [PATCH 040/470] fix(openviking): sync structured turns with tool
parts
---
plugins/memory/openviking/__init__.py | 339 +++++++++++++++++-
tests/openviking_plugin/test_openviking.py | 274 ++++++++++++++
.../memory/test_openviking_provider.py | 47 ++-
3 files changed, 639 insertions(+), 21 deletions(-)
diff --git a/plugins/memory/openviking/__init__.py b/plugins/memory/openviking/__init__.py
index 7ebe6869a46..c7b05a4864c 100644
--- a/plugins/memory/openviking/__init__.py
+++ b/plugins/memory/openviking/__init__.py
@@ -70,6 +70,8 @@ _TIMEOUT = 30.0
_SESSION_DRAIN_TIMEOUT = 10.0
_DEFERRED_COMMIT_TIMEOUT = (_TIMEOUT * 2) + 5.0
_REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
+_SYNC_TRACE_ENV = "HERMES_OPENVIKING_SYNC_TRACE"
+_OPENVIKING_RECALL_TOOL_NAMES = {"viking_search", "viking_read", "viking_browse"}
# Maps the viking_remember `category` enum to a viking:// subdirectory.
# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
@@ -156,6 +158,18 @@ def _derive_openviking_user_text(content: Any) -> str:
return extract_user_instruction_from_skill_message(content) or ""
+def _sync_trace_enabled() -> bool:
+ return os.environ.get(_SYNC_TRACE_ENV, "").strip().lower() in {"1", "true", "yes", "on"}
+
+
+def _preview(value: Any, limit: int = 160) -> str:
+ text = "" if value is None else str(value)
+ text = text.replace("\n", "\\n")
+ if len(text) > limit:
+ return text[:limit] + "..."
+ return text
+
+
# ---------------------------------------------------------------------------
# Process-level atexit safety net — ensures pending sessions are committed
# even if shutdown_memory_provider is never called (e.g. gateway crash,
@@ -2221,7 +2235,10 @@ class OpenVikingMemoryProvider(MemoryProvider):
def _commit_session(self, sid: str, turn_count: int, *, context: str) -> bool:
try:
- self._client.post(f"/api/v1/sessions/{sid}/commit")
+ self._client.post(
+ f"/api/v1/sessions/{sid}/commit",
+ {"keep_recent_count": 0},
+ )
self._mark_session_committed(sid)
logger.info("OpenViking session %s committed %s (%d turns)", sid, context, turn_count)
return True
@@ -2293,7 +2310,261 @@ class OpenVikingMemoryProvider(MemoryProvider):
with self._prefetch_lock:
self._prefetch_result = ""
- def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+ @staticmethod
+ def _message_text(content: Any) -> str:
+ """Extract text from OpenAI-style string/list content."""
+ if isinstance(content, str):
+ return content
+ if isinstance(content, list):
+ chunks = []
+ for block in content:
+ if isinstance(block, str):
+ chunks.append(block)
+ elif isinstance(block, dict):
+ if block.get("type") == "text" and isinstance(block.get("text"), str):
+ chunks.append(block["text"])
+ elif isinstance(block.get("content"), str):
+ chunks.append(block["content"])
+ return "\n".join(chunk for chunk in chunks if chunk)
+ if content is None:
+ return ""
+ return str(content)
+
+ @classmethod
+ def _message_matches_text(cls, message: Dict[str, Any], expected: Any) -> bool:
+ expected_text = cls._message_text(expected).strip()
+ if not expected_text:
+ return False
+ actual_text = cls._message_text(message.get("content")).strip()
+ return actual_text == expected_text
+
+ @classmethod
+ def _extract_current_turn_messages(
+ cls,
+ messages: Optional[List[Dict[str, Any]]],
+ user_content: str,
+ assistant_content: str,
+ ) -> List[Dict[str, Any]]:
+ """Slice the completed turn out of Hermes' full canonical transcript."""
+ if not messages:
+ return []
+
+ end_idx: Optional[int] = None
+ if cls._message_text(assistant_content).strip():
+ for idx in range(len(messages) - 1, -1, -1):
+ message = messages[idx]
+ if (
+ isinstance(message, dict)
+ and message.get("role") == "assistant"
+ and cls._message_matches_text(message, assistant_content)
+ ):
+ end_idx = idx
+ break
+ if end_idx is None:
+ for idx in range(len(messages) - 1, -1, -1):
+ message = messages[idx]
+ if isinstance(message, dict) and message.get("role") == "assistant":
+ end_idx = idx
+ break
+ if end_idx is None:
+ end_idx = len(messages) - 1
+
+ start_idx: Optional[int] = None
+ if cls._message_text(user_content).strip():
+ for idx in range(end_idx, -1, -1):
+ message = messages[idx]
+ if (
+ isinstance(message, dict)
+ and message.get("role") == "user"
+ and cls._message_matches_text(message, user_content)
+ ):
+ start_idx = idx
+ break
+ if start_idx is None:
+ for idx in range(end_idx, -1, -1):
+ message = messages[idx]
+ if isinstance(message, dict) and message.get("role") == "user":
+ start_idx = idx
+ break
+ if start_idx is None:
+ return []
+
+ return [message for message in messages[start_idx : end_idx + 1] if isinstance(message, dict)]
+
+ @staticmethod
+ def _tool_call_id(tool_call: Dict[str, Any]) -> str:
+ return str(tool_call.get("id") or tool_call.get("tool_call_id") or "")
+
+ @staticmethod
+ def _tool_call_name(tool_call: Dict[str, Any]) -> str:
+ function = tool_call.get("function")
+ if isinstance(function, dict):
+ return str(function.get("name") or "")
+ return str(tool_call.get("name") or "")
+
+ @staticmethod
+ def _is_openviking_recall_tool_name(tool_name: Any) -> bool:
+ return str(tool_name or "").strip().lower() in _OPENVIKING_RECALL_TOOL_NAMES
+
+ @staticmethod
+ def _tool_call_input(tool_call: Dict[str, Any]) -> Dict[str, Any]:
+ function = tool_call.get("function")
+ raw_args: Any = None
+ if isinstance(function, dict):
+ raw_args = function.get("arguments")
+ if raw_args is None:
+ raw_args = tool_call.get("args")
+ if raw_args is None:
+ return {}
+ if isinstance(raw_args, dict):
+ return raw_args
+ if isinstance(raw_args, str):
+ if not raw_args.strip():
+ return {}
+ try:
+ parsed = json.loads(raw_args)
+ except Exception:
+ return {"value": raw_args}
+ if isinstance(parsed, dict):
+ return parsed
+ return {"value": parsed}
+ return {"value": raw_args}
+
+ @classmethod
+ def _tool_result_status(cls, message: Dict[str, Any]) -> str:
+ raw_status = str(message.get("status") or message.get("tool_status") or "").lower()
+ if raw_status in {"error", "failed", "failure"}:
+ return "error"
+ if raw_status in {"completed", "complete", "success", "succeeded"}:
+ return "completed"
+
+ text = cls._message_text(message.get("content")).strip()
+ if text:
+ try:
+ parsed = json.loads(text)
+ except Exception:
+ parsed = None
+ if isinstance(parsed, dict):
+ status = str(parsed.get("status") or "").lower()
+ exit_code = parsed.get("exit_code")
+ if (
+ status in {"error", "failed", "failure"}
+ or parsed.get("success") is False
+ or bool(parsed.get("error"))
+ or (isinstance(exit_code, int) and exit_code != 0)
+ ):
+ return "error"
+ return "completed"
+
+ @classmethod
+ def _messages_to_openviking_batch(
+ cls,
+ messages: List[Dict[str, Any]],
+ ) -> List[Dict[str, Any]]:
+ """Convert Hermes canonical messages into OpenViking batch payloads."""
+ tool_calls_by_id: Dict[str, Dict[str, Any]] = {}
+ completed_tool_ids: set[str] = set()
+ skipped_tool_ids: set[str] = set()
+ for message in messages:
+ if not isinstance(message, dict):
+ continue
+ if message.get("role") == "tool":
+ tool_id = str(message.get("tool_call_id") or message.get("id") or "")
+ if tool_id:
+ completed_tool_ids.add(tool_id)
+ if cls._is_openviking_recall_tool_name(message.get("name")):
+ skipped_tool_ids.add(tool_id)
+ continue
+ if message.get("role") != "assistant":
+ continue
+ for tool_call in message.get("tool_calls") or []:
+ if not isinstance(tool_call, dict):
+ continue
+ tool_id = cls._tool_call_id(tool_call)
+ tool_name = cls._tool_call_name(tool_call)
+ if tool_id:
+ tool_calls_by_id[tool_id] = {
+ "tool_name": tool_name,
+ "tool_input": cls._tool_call_input(tool_call),
+ }
+ if cls._is_openviking_recall_tool_name(tool_name):
+ skipped_tool_ids.add(tool_id)
+
+ payload_messages: List[Dict[str, Any]] = []
+ pending_tool_parts: List[Dict[str, Any]] = []
+
+ def flush_tool_parts() -> None:
+ nonlocal pending_tool_parts
+ if pending_tool_parts:
+ payload_messages.append({"role": "user", "parts": pending_tool_parts})
+ pending_tool_parts = []
+
+ for message in messages:
+ if not isinstance(message, dict):
+ continue
+
+ role = str(message.get("role") or "")
+ if role in {"system", "developer"}:
+ continue
+
+ if role == "tool":
+ tool_id = str(message.get("tool_call_id") or message.get("id") or "")
+ prior_call = tool_calls_by_id.get(tool_id, {})
+ tool_name = str(message.get("name") or prior_call.get("tool_name") or "")
+ if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
+ continue
+ tool_part = {
+ "type": "tool",
+ "tool_id": tool_id,
+ "tool_name": tool_name,
+ "tool_input": prior_call.get("tool_input", {}),
+ "tool_output": cls._message_text(message.get("content")),
+ "tool_status": cls._tool_result_status(message),
+ }
+ pending_tool_parts.append(tool_part)
+ continue
+
+ if role not in {"user", "assistant"}:
+ continue
+
+ flush_tool_parts()
+ parts: List[Dict[str, Any]] = []
+ text = cls._message_text(message.get("content"))
+ if text:
+ parts.append({"type": "text", "text": text})
+
+ if role == "assistant":
+ for tool_call in message.get("tool_calls") or []:
+ if not isinstance(tool_call, dict):
+ continue
+ tool_id = cls._tool_call_id(tool_call)
+ tool_name = cls._tool_call_name(tool_call)
+ if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
+ continue
+ if tool_id in completed_tool_ids:
+ continue
+ parts.append({
+ "type": "tool",
+ "tool_id": tool_id,
+ "tool_name": tool_name,
+ "tool_input": cls._tool_call_input(tool_call),
+ "tool_status": "pending",
+ })
+
+ if parts:
+ payload_messages.append({"role": role, "parts": parts})
+
+ flush_tool_parts()
+ return payload_messages
+
+ def sync_turn(
+ self,
+ user_content: str,
+ assistant_content: str,
+ *,
+ session_id: str = "",
+ messages: Optional[List[Dict[str, Any]]] = None,
+ ) -> None:
"""Record the conversation turn in OpenViking's session (non-blocking)."""
if not self._client:
return
@@ -2302,6 +2573,37 @@ class OpenVikingMemoryProvider(MemoryProvider):
if not user_content:
return
+ turn_messages = (
+ self._extract_current_turn_messages(messages, user_content, assistant_content)
+ if messages is not None
+ else []
+ )
+ if turn_messages:
+ turn_messages = [dict(message) for message in turn_messages]
+ for message in turn_messages:
+ if message.get("role") == "user":
+ message["content"] = user_content
+ break
+ batch_messages = self._messages_to_openviking_batch(turn_messages)
+
+ if _sync_trace_enabled():
+ logger.info(
+ "OpenViking sync_turn trace: session_arg=%r cached_session=%r "
+ "messages_param_supported=true messages_present=%s message_count=%s "
+ "turn_message_count=%d batch_message_count=%d user_len=%d assistant_len=%d "
+ "user_preview=%r assistant_preview=%r",
+ session_id,
+ self._session_id,
+ messages is not None,
+ len(messages) if messages is not None else None,
+ len(turn_messages),
+ len(batch_messages),
+ len(str(user_content or "")),
+ len(str(assistant_content or "")),
+ _preview(user_content),
+ _preview(assistant_content),
+ )
+
# Snapshot the sid and bump the turn counter atomically so a
# concurrent on_session_switch/on_session_end can't interleave its
# snapshot+reset between the read and the increment (lost turn) and so
@@ -2313,24 +2615,39 @@ class OpenVikingMemoryProvider(MemoryProvider):
self._turn_count += 1
def _sync():
- try:
- client = self._new_client()
+ def _post_turn(client: _VikingClient) -> None:
+ if batch_messages:
+ payload = {"messages": batch_messages}
+ if _sync_trace_enabled():
+ logger.info(
+ "OpenViking sync_turn trace: POST /api/v1/sessions/%s/messages/batch payload=%s",
+ sid,
+ json.dumps(payload, ensure_ascii=False),
+ )
+ try:
+ client.post(f"/api/v1/sessions/{sid}/messages/batch", payload)
+ return
+ except Exception as batch_error:
+ logger.warning(
+ "OpenViking structured sync failed; falling back to text sync: %s",
+ batch_error,
+ )
+
self._post_session_turn(
client,
sid,
user_content[:4000],
- assistant_content[:4000],
+ self._message_text(assistant_content)[:4000],
)
+
+ try:
+ client = self._new_client()
+ _post_turn(client)
except Exception as e:
logger.debug("OpenViking sync_turn failed, reconnecting: %s", e)
try:
client = self._new_client()
- self._post_session_turn(
- client,
- sid,
- user_content[:4000],
- assistant_content[:4000],
- )
+ _post_turn(client)
except Exception as retry_error:
logger.warning("OpenViking sync_turn failed: %s", retry_error)
diff --git a/tests/openviking_plugin/test_openviking.py b/tests/openviking_plugin/test_openviking.py
index f10fc502000..ee5d1eb2373 100644
--- a/tests/openviking_plugin/test_openviking.py
+++ b/tests/openviking_plugin/test_openviking.py
@@ -265,6 +265,280 @@ class TestOpenVikingSkillQuerySafety:
assert RecordingVikingClient.calls == []
+class TestOpenVikingTurnConversion:
+ def test_extract_current_turn_anchors_on_latest_matching_user_and_assistant(self):
+ messages = [
+ {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+ {"role": "assistant", "content": "Earlier answer."},
+ {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+ {
+ "role": "assistant",
+ "content": "I will search the codebase.",
+ "tool_calls": [
+ {
+ "id": "call_rg_1",
+ "type": "function",
+ "function": {
+ "name": "shell_command",
+ "arguments": json.dumps({"command": "rg assemble"}),
+ },
+ }
+ ],
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "call_rg_1",
+ "name": "shell_command",
+ "content": "agent/context_engine.py: no preassemble hook",
+ },
+ {"role": "assistant", "content": "The current main does not expose assemble."},
+ ]
+
+ turn = OpenVikingMemoryProvider._extract_current_turn_messages(
+ messages,
+ "Please inspect the repository for assemble hooks.",
+ "The current main does not expose assemble.",
+ )
+
+ assert turn == messages[2:]
+
+ def test_messages_to_openviking_batch_coalesces_tool_results(self):
+ turn = [
+ {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+ {
+ "role": "assistant",
+ "content": "I will search the codebase.",
+ "tool_calls": [
+ {
+ "id": "call_rg_1",
+ "type": "function",
+ "function": {
+ "name": "shell_command",
+ "arguments": json.dumps({"command": "rg assemble"}),
+ },
+ }
+ ],
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "call_rg_1",
+ "name": "shell_command",
+ "content": "agent/context_engine.py: no preassemble hook",
+ },
+ {"role": "assistant", "content": "The current main does not expose assemble."},
+ ]
+
+ batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+ assert [message["role"] for message in batch] == ["user", "assistant", "user", "assistant"]
+ assert batch[0]["parts"] == [
+ {"type": "text", "text": "Please inspect the repository for assemble hooks."}
+ ]
+ assert batch[1]["parts"] == [
+ {"type": "text", "text": "I will search the codebase."}
+ ]
+ assert batch[2]["parts"] == [
+ {
+ "type": "tool",
+ "tool_id": "call_rg_1",
+ "tool_name": "shell_command",
+ "tool_input": {"command": "rg assemble"},
+ "tool_output": "agent/context_engine.py: no preassemble hook",
+ "tool_status": "completed",
+ }
+ ]
+ assert batch[3]["parts"] == [
+ {"type": "text", "text": "The current main does not expose assemble."}
+ ]
+
+ def test_messages_to_openviking_batch_marks_json_tool_error_results(self):
+ turn = [
+ {"role": "user", "content": "Check the file."},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "id": "call_read_1",
+ "type": "function",
+ "function": {
+ "name": "read_file",
+ "arguments": json.dumps({"path": "missing.md"}),
+ },
+ }
+ ],
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "call_read_1",
+ "name": "read_file",
+ "content": json.dumps({"error": "File not found", "exit_code": 1}),
+ },
+ ]
+
+ batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+ assert batch[1]["parts"] == [
+ {
+ "type": "tool",
+ "tool_id": "call_read_1",
+ "tool_name": "read_file",
+ "tool_input": {"path": "missing.md"},
+ "tool_output": json.dumps({"error": "File not found", "exit_code": 1}),
+ "tool_status": "error",
+ }
+ ]
+
+ def test_messages_to_openviking_batch_keeps_pending_tool_call_without_result(self):
+ turn = [
+ {"role": "user", "content": "Start a long running check."},
+ {
+ "role": "assistant",
+ "content": "Starting it now.",
+ "tool_calls": [
+ {
+ "id": "call_long_1",
+ "type": "function",
+ "function": {
+ "name": "long_check",
+ "arguments": json.dumps({"target": "repo"}),
+ },
+ }
+ ],
+ },
+ ]
+
+ batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+ assert batch[1]["parts"] == [
+ {"type": "text", "text": "Starting it now."},
+ {
+ "type": "tool",
+ "tool_id": "call_long_1",
+ "tool_name": "long_check",
+ "tool_input": {"target": "repo"},
+ "tool_status": "pending",
+ },
+ ]
+
+ def test_messages_to_openviking_batch_coalesces_adjacent_tool_results(self):
+ turn = [
+ {"role": "user", "content": "Run both tools."},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "id": "call_a",
+ "type": "function",
+ "function": {
+ "name": "first_tool",
+ "arguments": json.dumps({"x": 1}),
+ },
+ },
+ {
+ "id": "call_b",
+ "type": "function",
+ "function": {
+ "name": "second_tool",
+ "arguments": json.dumps({"y": 2}),
+ },
+ },
+ ],
+ },
+ {"role": "tool", "tool_call_id": "call_a", "name": "first_tool", "content": "a"},
+ {"role": "tool", "tool_call_id": "call_b", "name": "second_tool", "content": "b"},
+ {"role": "assistant", "content": "Done."},
+ ]
+
+ batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+ assert [message["role"] for message in batch] == ["user", "user", "assistant"]
+ assert batch[1]["parts"] == [
+ {
+ "type": "tool",
+ "tool_id": "call_a",
+ "tool_name": "first_tool",
+ "tool_input": {"x": 1},
+ "tool_output": "a",
+ "tool_status": "completed",
+ },
+ {
+ "type": "tool",
+ "tool_id": "call_b",
+ "tool_name": "second_tool",
+ "tool_input": {"y": 2},
+ "tool_output": "b",
+ "tool_status": "completed",
+ },
+ ]
+
+ def test_messages_to_openviking_batch_skips_openviking_recall_tool_results(self):
+ for recall_tool_name in ("viking_search", "viking_read", "viking_browse"):
+ turn = [
+ {"role": "user", "content": "What did we decide about context assembly?"},
+ {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "id": "call_recall_1",
+ "type": "function",
+ "function": {
+ "name": recall_tool_name,
+ "arguments": json.dumps({"query": "context assembly decision"}),
+ },
+ },
+ {
+ "id": "call_shell_1",
+ "type": "function",
+ "function": {
+ "name": "shell_command",
+ "arguments": json.dumps({"command": "rg preassemble"}),
+ },
+ },
+ ],
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "call_recall_1",
+ "name": recall_tool_name,
+ "content": json.dumps({
+ "results": [
+ {
+ "uri": "viking://user/hermes/memories/context",
+ "abstract": "Old OpenViking memory content",
+ }
+ ]
+ }),
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "call_shell_1",
+ "name": "shell_command",
+ "content": "plugins/memory/openviking/__init__.py",
+ },
+ {"role": "assistant", "content": "We decided to keep sync_turn scoped to ingestion."},
+ ]
+
+ batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+ assert [message["role"] for message in batch] == ["user", "user", "assistant"]
+ assert batch[1]["parts"] == [
+ {
+ "type": "tool",
+ "tool_id": "call_shell_1",
+ "tool_name": "shell_command",
+ "tool_input": {"command": "rg preassemble"},
+ "tool_output": "plugins/memory/openviking/__init__.py",
+ "tool_status": "completed",
+ }
+ ]
+ batch_text = json.dumps(batch)
+ assert recall_tool_name not in batch_text
+ assert "Old OpenViking memory content" not in batch_text
+
+
class TestOpenVikingRead:
def test_overview_read_normalizes_uri_and_unwraps_result(self):
provider = OpenVikingMemoryProvider()
diff --git a/tests/plugins/memory/test_openviking_provider.py b/tests/plugins/memory/test_openviking_provider.py
index 954385fa54e..2863566b367 100644
--- a/tests/plugins/memory/test_openviking_provider.py
+++ b/tests/plugins/memory/test_openviking_provider.py
@@ -1975,7 +1975,10 @@ def test_on_session_switch_commits_old_session_and_rotates_id():
provider.on_session_switch("new-sid", parent_session_id="old-sid")
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@@ -1998,7 +2001,10 @@ def test_on_session_switch_commits_pending_tokens_without_turn_count():
provider.on_session_switch("new-sid")
provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@@ -2051,7 +2057,10 @@ def test_on_session_switch_waits_for_inflight_sync_thread():
provider.on_session_switch("new-sid")
assert join_calls, "expected on_session_switch to join the in-flight sync thread"
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
def test_on_session_switch_noop_on_empty_new_id():
@@ -2206,7 +2215,10 @@ def test_on_session_end_marks_session_clean_after_successful_commit():
provider.on_session_end([])
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
assert provider._turn_count == 0
@@ -2228,7 +2240,10 @@ def test_on_session_end_commits_pending_tokens_without_turn_count():
provider.on_session_end([])
provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
def test_end_then_switch_does_not_double_commit():
@@ -2241,7 +2256,10 @@ def test_end_then_switch_does_not_double_commit():
provider.on_session_switch("new-sid", parent_session_id="old-sid")
# Exactly one commit call, on the OLD session, fired by on_session_end.
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@@ -2253,7 +2271,10 @@ def test_end_then_switch_with_pending_tokens_does_not_double_commit():
provider.on_session_end([])
provider.on_session_switch("new-sid", parent_session_id="old-sid")
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@@ -2400,7 +2421,10 @@ def test_on_session_switch_does_not_block_caller_on_slow_drain():
# Let the finalizer finish so it doesn't leak past the test.
release_drain.set()
assert provider._drain_finalizers(timeout=5.0)
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
def test_on_session_switch_defers_old_commit_to_finalizer_thread():
@@ -2415,7 +2439,7 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
committed = threading.Event()
drain_timeouts = []
- def fake_post(path):
+ def fake_post(path, payload=None):
committed.set()
return {}
@@ -2433,7 +2457,10 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
assert provider._turn_count == 0
# The old-session commit lands on the finalizer thread, not inline.
assert committed.wait(timeout=5.0), "old session was not finalized off-thread"
- provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+ provider._client.post.assert_called_once_with(
+ "/api/v1/sessions/old-sid/commit",
+ {"keep_recent_count": 0},
+ )
# The finalizer drains with the deferred (longer) budget, not inline 10s.
assert drain_timeouts == [_DEFERRED_COMMIT_TIMEOUT]
From d7cd0bc0863cda1a203f00422b1441ca2d9890ed Mon Sep 17 00:00:00 2001
From: Hao Zhe
Date: Fri, 19 Jun 2026 13:42:36 +0800
Subject: [PATCH 041/470] fix(openviking): preserve structured sync attribution
---
agent/codex_runtime.py | 1 +
agent/message_content.py | 50 +++++++++++++
plugins/memory/openviking/__init__.py | 36 +++++-----
tests/agent/test_message_content.py | 25 +++++++
tests/openviking_plugin/test_openviking.py | 36 +++++++++-
.../memory/test_openviking_provider.py | 72 +++++++++++++++++++
.../test_codex_app_server_integration.py | 13 +++-
7 files changed, 210 insertions(+), 23 deletions(-)
create mode 100644 agent/message_content.py
create mode 100644 tests/agent/test_message_content.py
diff --git a/agent/codex_runtime.py b/agent/codex_runtime.py
index 7f175fff97f..4ff67871934 100644
--- a/agent/codex_runtime.py
+++ b/agent/codex_runtime.py
@@ -290,6 +290,7 @@ def run_codex_app_server_turn(
original_user_message=original_user_message,
final_response=turn.final_text,
interrupted=False,
+ messages=messages,
)
except Exception:
logger.debug("external memory sync raised", exc_info=True)
diff --git a/agent/message_content.py b/agent/message_content.py
new file mode 100644
index 00000000000..c42bf408550
--- /dev/null
+++ b/agent/message_content.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+
+_NON_TEXT_PART_TYPES = {"image", "image_url", "input_image", "audio", "input_audio"}
+_TEXT_KEYS = ("text", "content", "input_text", "output_text", "summary_text")
+
+
+def _field(value: Any, key: str) -> Any:
+ if isinstance(value, Mapping):
+ return value.get(key)
+ return getattr(value, key, None)
+
+
+def _text_from_part(part: Any) -> str:
+ if part is None:
+ return ""
+ if isinstance(part, str):
+ return part
+
+ part_type = str(_field(part, "type") or "").strip().lower()
+ if part_type in _NON_TEXT_PART_TYPES:
+ return ""
+
+ for key in _TEXT_KEYS:
+ text = _field(part, key)
+ if isinstance(text, str):
+ return text
+ return ""
+
+
+def flatten_message_text(content: Any, *, sep: str = "\n") -> str:
+ """Return the visible text from common chat/Responses message content shapes."""
+ if content is None:
+ return ""
+ if isinstance(content, str):
+ return content
+ if isinstance(content, list):
+ chunks = [_text_from_part(part) for part in content]
+ return sep.join(chunk for chunk in chunks if chunk)
+
+ text = _text_from_part(content)
+ if text:
+ return text
+ try:
+ return str(content)
+ except Exception:
+ return ""
diff --git a/plugins/memory/openviking/__init__.py b/plugins/memory/openviking/__init__.py
index c7b05a4864c..82f1f26a0a0 100644
--- a/plugins/memory/openviking/__init__.py
+++ b/plugins/memory/openviking/__init__.py
@@ -45,6 +45,7 @@ from typing import Any, Callable, Dict, List, Optional, Set
from urllib.parse import urlparse
from urllib.request import url2pathname
+from agent.message_content import flatten_message_text
from agent.memory_provider import MemoryProvider
from agent.skill_commands import extract_user_instruction_from_skill_message
from tools.registry import tool_error
@@ -2313,22 +2314,7 @@ class OpenVikingMemoryProvider(MemoryProvider):
@staticmethod
def _message_text(content: Any) -> str:
"""Extract text from OpenAI-style string/list content."""
- if isinstance(content, str):
- return content
- if isinstance(content, list):
- chunks = []
- for block in content:
- if isinstance(block, str):
- chunks.append(block)
- elif isinstance(block, dict):
- if block.get("type") == "text" and isinstance(block.get("text"), str):
- chunks.append(block["text"])
- elif isinstance(block.get("content"), str):
- chunks.append(block["content"])
- return "\n".join(chunk for chunk in chunks if chunk)
- if content is None:
- return ""
- return str(content)
+ return flatten_message_text(content)
@classmethod
def _message_matches_text(cls, message: Dict[str, Any], expected: Any) -> bool:
@@ -2460,8 +2446,11 @@ class OpenVikingMemoryProvider(MemoryProvider):
def _messages_to_openviking_batch(
cls,
messages: List[Dict[str, Any]],
+ *,
+ assistant_peer_id: str = "",
) -> List[Dict[str, Any]]:
"""Convert Hermes canonical messages into OpenViking batch payloads."""
+ assistant_peer_id = str(assistant_peer_id or "").strip()
tool_calls_by_id: Dict[str, Dict[str, Any]] = {}
completed_tool_ids: set[str] = set()
skipped_tool_ids: set[str] = set()
@@ -2493,10 +2482,16 @@ class OpenVikingMemoryProvider(MemoryProvider):
payload_messages: List[Dict[str, Any]] = []
pending_tool_parts: List[Dict[str, Any]] = []
+ def payload_message(role: str, parts: List[Dict[str, Any]]) -> Dict[str, Any]:
+ payload: Dict[str, Any] = {"role": role, "parts": parts}
+ if role == "assistant" and assistant_peer_id:
+ payload["peer_id"] = assistant_peer_id
+ return payload
+
def flush_tool_parts() -> None:
nonlocal pending_tool_parts
if pending_tool_parts:
- payload_messages.append({"role": "user", "parts": pending_tool_parts})
+ payload_messages.append(payload_message("assistant", pending_tool_parts))
pending_tool_parts = []
for message in messages:
@@ -2552,7 +2547,7 @@ class OpenVikingMemoryProvider(MemoryProvider):
})
if parts:
- payload_messages.append({"role": role, "parts": parts})
+ payload_messages.append(payload_message(role, parts))
flush_tool_parts()
return payload_messages
@@ -2584,7 +2579,10 @@ class OpenVikingMemoryProvider(MemoryProvider):
if message.get("role") == "user":
message["content"] = user_content
break
- batch_messages = self._messages_to_openviking_batch(turn_messages)
+ batch_messages = self._messages_to_openviking_batch(
+ turn_messages,
+ assistant_peer_id=getattr(self, "_agent", _DEFAULT_AGENT),
+ )
if _sync_trace_enabled():
logger.info(
diff --git a/tests/agent/test_message_content.py b/tests/agent/test_message_content.py
new file mode 100644
index 00000000000..0207d63600b
--- /dev/null
+++ b/tests/agent/test_message_content.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+from agent.message_content import flatten_message_text
+
+
+def test_flatten_message_text_accepts_chat_and_responses_text_parts():
+ content = [
+ {"type": "text", "text": "chat text"},
+ {"type": "input_text", "text": "user text"},
+ {"type": "output_text", "text": "assistant text"},
+ {"type": "summary_text", "text": "summary text"},
+ ]
+
+ assert flatten_message_text(content) == "chat text\nuser text\nassistant text\nsummary text"
+
+
+def test_flatten_message_text_accepts_object_parts():
+ content = [
+ SimpleNamespace(type="output_text", text="object text"),
+ {"content": "legacy content"},
+ ]
+
+ assert flatten_message_text(content) == "object text\nlegacy content"
diff --git a/tests/openviking_plugin/test_openviking.py b/tests/openviking_plugin/test_openviking.py
index ee5d1eb2373..3a743287672 100644
--- a/tests/openviking_plugin/test_openviking.py
+++ b/tests/openviking_plugin/test_openviking.py
@@ -330,7 +330,7 @@ class TestOpenVikingTurnConversion:
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
- assert [message["role"] for message in batch] == ["user", "assistant", "user", "assistant"]
+ assert [message["role"] for message in batch] == ["user", "assistant", "assistant", "assistant"]
assert batch[0]["parts"] == [
{"type": "text", "text": "Please inspect the repository for assemble hooks."}
]
@@ -378,6 +378,7 @@ class TestOpenVikingTurnConversion:
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+ assert batch[1]["role"] == "assistant"
assert batch[1]["parts"] == [
{
"type": "tool",
@@ -453,7 +454,7 @@ class TestOpenVikingTurnConversion:
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
- assert [message["role"] for message in batch] == ["user", "user", "assistant"]
+ assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
assert batch[1]["parts"] == [
{
"type": "tool",
@@ -523,7 +524,7 @@ class TestOpenVikingTurnConversion:
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
- assert [message["role"] for message in batch] == ["user", "user", "assistant"]
+ assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
assert batch[1]["parts"] == [
{
"type": "tool",
@@ -538,6 +539,35 @@ class TestOpenVikingTurnConversion:
assert recall_tool_name not in batch_text
assert "Old OpenViking memory content" not in batch_text
+ def test_messages_to_openviking_batch_preserves_responses_text_parts(self):
+ turn = [
+ {"role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"role": "assistant", "content": [{"type": "output_text", "text": "answer"}]},
+ ]
+
+ batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+ assert batch == [
+ {"role": "user", "parts": [{"type": "text", "text": "hello"}]},
+ {"role": "assistant", "parts": [{"type": "text", "text": "answer"}]},
+ ]
+
+ def test_messages_to_openviking_batch_adds_assistant_peer_id_when_requested(self):
+ turn = [
+ {"role": "user", "content": "hello"},
+ {"role": "assistant", "content": "answer"},
+ ]
+
+ batch = OpenVikingMemoryProvider._messages_to_openviking_batch(
+ turn,
+ assistant_peer_id="hermes",
+ )
+
+ assert batch == [
+ {"role": "user", "parts": [{"type": "text", "text": "hello"}]},
+ {"role": "assistant", "parts": [{"type": "text", "text": "answer"}], "peer_id": "hermes"},
+ ]
+
class TestOpenVikingRead:
def test_overview_read_normalizes_uri_and_unwraps_result(self):
diff --git a/tests/plugins/memory/test_openviking_provider.py b/tests/plugins/memory/test_openviking_provider.py
index 2863566b367..28f2d8e9d46 100644
--- a/tests/plugins/memory/test_openviking_provider.py
+++ b/tests/plugins/memory/test_openviking_provider.py
@@ -2195,6 +2195,78 @@ def test_sync_turn_retries_batch_write_with_fresh_client():
)]
+def test_sync_turn_structured_messages_include_assistant_peer_id():
+ provider = OpenVikingMemoryProvider()
+ provider._client = MagicMock()
+ provider._endpoint = "http://test"
+ provider._api_key = ""
+ provider._account = "acct"
+ provider._user = "usr"
+ provider._agent = "hermes"
+ provider._session_id = "sid-structured"
+
+ captured = []
+
+ class StubClient:
+ def __init__(self, *a, **kw):
+ pass
+
+ def post(self, path, payload=None, **kwargs):
+ captured.append((path, payload))
+ return {}
+
+ import plugins.memory.openviking as _mod
+
+ real_client_cls = _mod._VikingClient
+ _mod._VikingClient = StubClient
+ messages = [
+ {"role": "user", "content": [{"type": "input_text", "text": "u"}]},
+ {
+ "role": "assistant",
+ "content": "Looking.",
+ "tool_calls": [
+ {
+ "id": "call-1",
+ "type": "function",
+ "function": {"name": "shell_command", "arguments": json.dumps({"cmd": "pwd"})},
+ }
+ ],
+ },
+ {"role": "tool", "tool_call_id": "call-1", "name": "shell_command", "content": "ok"},
+ {"role": "assistant", "content": [{"type": "output_text", "text": "a"}]},
+ ]
+ try:
+ provider.sync_turn("u", "a", messages=messages)
+ assert provider._drain_writers("sid-structured", timeout=2.0)
+ finally:
+ _mod._VikingClient = real_client_cls
+
+ assert captured == [(
+ "/api/v1/sessions/sid-structured/messages/batch",
+ {
+ "messages": [
+ {"role": "user", "parts": [{"type": "text", "text": "u"}]},
+ {"role": "assistant", "parts": [{"type": "text", "text": "Looking."}], "peer_id": "hermes"},
+ {
+ "role": "assistant",
+ "parts": [
+ {
+ "type": "tool",
+ "tool_id": "call-1",
+ "tool_name": "shell_command",
+ "tool_input": {"cmd": "pwd"},
+ "tool_output": "ok",
+ "tool_status": "completed",
+ }
+ ],
+ "peer_id": "hermes",
+ },
+ {"role": "assistant", "parts": [{"type": "text", "text": "a"}], "peer_id": "hermes"},
+ ]
+ },
+ )]
+
+
def test_sync_turn_noop_when_session_id_blank():
provider = OpenVikingMemoryProvider()
provider._client = MagicMock()
diff --git a/tests/run_agent/test_codex_app_server_integration.py b/tests/run_agent/test_codex_app_server_integration.py
index 14c058178b9..b0d2ec23861 100644
--- a/tests/run_agent/test_codex_app_server_integration.py
+++ b/tests/run_agent/test_codex_app_server_integration.py
@@ -12,7 +12,7 @@ Verifies that:
from __future__ import annotations
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
import pytest
@@ -148,6 +148,17 @@ class TestRunConversationCodexPath:
and m.get("content") == "echo: hello"]
assert final, f"expected final assistant message in {msgs}"
+ def test_projected_messages_are_synced_to_external_memory(self, fake_session):
+ agent = _make_codex_agent()
+ agent._memory_manager = MagicMock()
+ agent._memory_manager.build_system_prompt.return_value = ""
+
+ with patch.object(agent, "_spawn_background_review", return_value=None):
+ result = agent.run_conversation("hello")
+
+ agent._memory_manager.sync_all.assert_called_once()
+ assert agent._memory_manager.sync_all.call_args.kwargs["messages"] == result["messages"]
+
def test_nudge_counters_tick(self, fake_session):
"""The skill nudge counter must accumulate tool_iterations across
turns. The memory nudge counter is gated on memory being configured
From 15e3b64b7538bb0a38e4bfd91d9c8a4f8110ce8f Mon Sep 17 00:00:00 2001
From: Shannon Sands
Date: Fri, 19 Jun 2026 11:25:05 +1000
Subject: [PATCH 042/470] fix(tui): keep hosted dashboard chat alive on exit
---
.../src/__tests__/createSlashHandler.test.ts | 30 +++++++++++++++++++
ui-tui/src/app/slash/commands/core.ts | 24 ++++++++++++++-
2 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index a671063e5e9..c0247795af3 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -9,6 +9,10 @@ describe('createSlashHandler', () => {
beforeEach(() => {
resetOverlayState()
resetUiState()
+ delete process.env.HERMES_TUI_INLINE
+ delete process.env.HERMES_HOME
+ delete process.env.HERMES_WRITE_SAFE_ROOT
+ delete process.env.HERMES_DISABLE_LAZY_INSTALLS
})
it('opens the unified sessions overlay for /resume', () => {
@@ -68,6 +72,32 @@ describe('createSlashHandler', () => {
expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
})
+ it('keeps hosted dashboard chat alive for /exit', () => {
+ process.env.HERMES_TUI_INLINE = '1'
+ process.env.HERMES_HOME = '/opt/data/profiles/worker'
+ process.env.HERMES_WRITE_SAFE_ROOT = '/opt/data'
+ process.env.HERMES_DISABLE_LAZY_INSTALLS = '1'
+ const ctx = buildCtx()
+
+ expect(createSlashHandler(ctx)('/exit')).toBe(true)
+ expect(ctx.session.die).not.toHaveBeenCalled()
+ expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+ expect(ctx.transcript.sys).toHaveBeenCalledWith(
+ 'exit is disabled in hosted dashboard chat — use /new to start a fresh session'
+ )
+ })
+
+ it('keeps /quit available outside hosted dashboard chat', () => {
+ process.env.HERMES_TUI_INLINE = '1'
+ process.env.HERMES_HOME = '/Users/example/.hermes'
+ process.env.HERMES_WRITE_SAFE_ROOT = '/Users/example/.hermes'
+ process.env.HERMES_DISABLE_LAZY_INSTALLS = '1'
+ const ctx = buildCtx()
+
+ expect(createSlashHandler(ctx)('/quit')).toBe(true)
+ expect(ctx.session.die).toHaveBeenCalledTimes(1)
+ })
+
it('handles /update locally and exits with code 42 via dieWithCode', () => {
vi.useFakeTimers()
const ctx = buildCtx()
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index 5c021dbcdf9..b5d72cf7712 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -76,6 +76,20 @@ const DETAILS_USAGE =
const DETAILS_SECTION_USAGE = 'usage: /details [hidden|collapsed|expanded|reset]'
+const truthyEnv = (v?: string) => /^(?:1|true|yes|on)$/i.test((v ?? '').trim())
+
+const hostedInlineDashboardChat = () => {
+ const hermesHome = (process.env.HERMES_HOME ?? '').trim()
+ const hostedHome = hermesHome === '/opt/data' || hermesHome.startsWith('/opt/data/')
+
+ return (
+ process.env.HERMES_TUI_INLINE === '1' &&
+ hostedHome &&
+ process.env.HERMES_WRITE_SAFE_ROOT === '/opt/data' &&
+ truthyEnv(process.env.HERMES_DISABLE_LAZY_INSTALLS)
+ )
+}
+
export const coreCommands: SlashCommand[] = [
{
help: 'list commands + hotkeys',
@@ -113,7 +127,15 @@ export const coreCommands: SlashCommand[] = [
aliases: ['exit'],
help: 'exit hermes',
name: 'quit',
- run: (_arg, ctx) => ctx.session.die()
+ run: (_arg, ctx) => {
+ if (hostedInlineDashboardChat()) {
+ ctx.transcript.sys('exit is disabled in hosted dashboard chat — use /new to start a fresh session')
+
+ return
+ }
+
+ ctx.session.die()
+ }
},
{
From 3f0e9849e7a2753931ef32c624cae33a7461e653 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 19 Jun 2026 12:29:19 +0530
Subject: [PATCH 043/470] refactor(tui): reuse DASHBOARD_TUI_MODE for hosted
/exit guard
Follow-up to the salvaged hosted /exit fix. Instead of a separate 4-env-var
fingerprint (HERMES_TUI_INLINE + /opt/data HERMES_HOME + HERMES_WRITE_SAFE_ROOT
+ HERMES_DISABLE_LAZY_INSTALLS), gate /exit and /quit on the existing
DASHBOARD_TUI_MODE flag (HERMES_TUI_DASHBOARD) that the keyboard idle-exit
(useInputHandlers) and SIGINT-ignore (entry.tsx) paths already use. One hosted
detection mechanism instead of two divergent ones.
Extract the refusal text to an exported DASHBOARD_EXIT_DISABLED_MESSAGE so the
test asserts the same source of truth as production (no change-detector on the
literal). Test mocks only the DASHBOARD_TUI_MODE export via importActual so the
other env exports stay real.
---
.../src/__tests__/createSlashHandler.test.ts | 35 +++++++++++--------
ui-tui/src/app/slash/commands/core.ts | 30 ++++++++--------
2 files changed, 34 insertions(+), 31 deletions(-)
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index c0247795af3..415dd4c0f3c 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -2,17 +2,30 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
import { createSlashHandler } from '../app/createSlashHandler.js'
import { getOverlayState, resetOverlayState } from '../app/overlayStore.js'
+import { DASHBOARD_EXIT_DISABLED_MESSAGE } from '../app/slash/commands/core.js'
import { getUiState, patchUiState, resetUiState } from '../app/uiStore.js'
import { TUI_SESSION_MODEL_FLAG } from '../domain/slash.js'
+// DASHBOARD_TUI_MODE resolves once at module load from HERMES_TUI_DASHBOARD,
+// so toggling process.env in a test body can't move it. Mock just that one
+// export (everything else stays real) and flip the holder per test.
+const envState = { dashboardTuiMode: false }
+vi.mock('../config/env.js', async importActual => {
+ const actual = await importActual()
+
+ return {
+ ...actual,
+ get DASHBOARD_TUI_MODE() {
+ return envState.dashboardTuiMode
+ }
+ }
+})
+
describe('createSlashHandler', () => {
beforeEach(() => {
resetOverlayState()
resetUiState()
- delete process.env.HERMES_TUI_INLINE
- delete process.env.HERMES_HOME
- delete process.env.HERMES_WRITE_SAFE_ROOT
- delete process.env.HERMES_DISABLE_LAZY_INSTALLS
+ envState.dashboardTuiMode = false
})
it('opens the unified sessions overlay for /resume', () => {
@@ -73,25 +86,17 @@ describe('createSlashHandler', () => {
})
it('keeps hosted dashboard chat alive for /exit', () => {
- process.env.HERMES_TUI_INLINE = '1'
- process.env.HERMES_HOME = '/opt/data/profiles/worker'
- process.env.HERMES_WRITE_SAFE_ROOT = '/opt/data'
- process.env.HERMES_DISABLE_LAZY_INSTALLS = '1'
+ envState.dashboardTuiMode = true
const ctx = buildCtx()
expect(createSlashHandler(ctx)('/exit')).toBe(true)
expect(ctx.session.die).not.toHaveBeenCalled()
expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
- expect(ctx.transcript.sys).toHaveBeenCalledWith(
- 'exit is disabled in hosted dashboard chat — use /new to start a fresh session'
- )
+ expect(ctx.transcript.sys).toHaveBeenCalledWith(DASHBOARD_EXIT_DISABLED_MESSAGE)
})
it('keeps /quit available outside hosted dashboard chat', () => {
- process.env.HERMES_TUI_INLINE = '1'
- process.env.HERMES_HOME = '/Users/example/.hermes'
- process.env.HERMES_WRITE_SAFE_ROOT = '/Users/example/.hermes'
- process.env.HERMES_DISABLE_LAZY_INSTALLS = '1'
+ envState.dashboardTuiMode = false
const ctx = buildCtx()
expect(createSlashHandler(ctx)('/quit')).toBe(true)
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index b5d72cf7712..7c5a79505ad 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -1,6 +1,6 @@
import { forceRedraw, type MouseTrackingMode } from '@hermes/ink'
-import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
+import { DASHBOARD_TUI_MODE, NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
import { HOTKEYS } from '../../../content/hotkeys.js'
import { isSectionName, nextDetailsMode, parseDetailsMode, SECTION_NAMES } from '../../../domain/details.js'
@@ -76,19 +76,10 @@ const DETAILS_USAGE =
const DETAILS_SECTION_USAGE = 'usage: /details [hidden|collapsed|expanded|reset]'
-const truthyEnv = (v?: string) => /^(?:1|true|yes|on)$/i.test((v ?? '').trim())
-
-const hostedInlineDashboardChat = () => {
- const hermesHome = (process.env.HERMES_HOME ?? '').trim()
- const hostedHome = hermesHome === '/opt/data' || hermesHome.startsWith('/opt/data/')
-
- return (
- process.env.HERMES_TUI_INLINE === '1' &&
- hostedHome &&
- process.env.HERMES_WRITE_SAFE_ROOT === '/opt/data' &&
- truthyEnv(process.env.HERMES_DISABLE_LAZY_INSTALLS)
- )
-}
+// Shown when /exit or /quit is refused in the hosted dashboard chat. Kept as a
+// constant so the test asserts against the same source of truth as production.
+export const DASHBOARD_EXIT_DISABLED_MESSAGE =
+ 'exit is disabled in hosted dashboard chat — use /new to start a fresh session'
export const coreCommands: SlashCommand[] = [
{
@@ -128,8 +119,15 @@ export const coreCommands: SlashCommand[] = [
help: 'exit hermes',
name: 'quit',
run: (_arg, ctx) => {
- if (hostedInlineDashboardChat()) {
- ctx.transcript.sys('exit is disabled in hosted dashboard chat — use /new to start a fresh session')
+ // In the hosted dashboard chat there is no in-page restart path after
+ // the PTY child exits, so quitting bricks the tab until a refresh. The
+ // keyboard idle-exit (Ctrl+C / Ctrl+D) and SIGINT handling already refuse
+ // to die in this mode (see useInputHandlers + entry.tsx); gate /exit and
+ // /quit on the same DASHBOARD_TUI_MODE flag. Unlike the keyboard path
+ // (which auto-starts a fresh chat), the explicit quit command refuses and
+ // instructs the user to run /new themselves.
+ if (DASHBOARD_TUI_MODE) {
+ ctx.transcript.sys(DASHBOARD_EXIT_DISABLED_MESSAGE)
return
}
From 5a856bdfa355bb45330a23ecb63abdf9b810e865 Mon Sep 17 00:00:00 2001
From: Hao Zhe
Date: Fri, 19 Jun 2026 15:38:25 +0800
Subject: [PATCH 044/470] chore(release): add OpenViking contributor
attribution
---
scripts/release.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/scripts/release.py b/scripts/release.py
index 6c5d33ec3a1..4e5f8844439 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1577,6 +1577,7 @@ AUTHOR_MAP = {
"sunsky.lau@gmail.com": "liuhao1024", # PR #45494 salvage (claim session slot before auto-resume task; #45456)
"andrewdmwalker@gmail.com": "capt-marbles", # PR #38440 salvage (resolve xAI OAuth credentials across profiles; #43589)
"infinitycrew39@gmail.com": "infinitycrew39", # PR #47945 salvage (scope langfuse trace state by turn/request ids; #48292)
+ "eurekaxun@163.com": "huangxun375-stack", # PR #37251 / #48894 structured OpenViking sync
}
From 9362ce2575e00f5a795285b74e79d54c02e1326c Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Fri, 19 Jun 2026 13:32:31 +0530
Subject: [PATCH 045/470] feat(skills): add html-artifact skill, fold in sketch
+ architecture-diagram + concept-diagrams (#48899)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* feat(skills): add html-artifact skill, fold in sketch + architecture-diagram + concept-diagrams
Adds a unified `html-artifact` creative skill that produces self-contained,
single-file HTML artifacts — concept explainers, implementation plans,
status/incident reports, code-review walkthroughs, technical + educational
SVG diagrams, multi-variant design comparisons, and throwaway editors that
export their state back to the clipboard. Grounded in Anthropic's
html-effectiveness gallery (MIT); the house style (token block, serif/sans/
mono split, hand-rolled diffs, inline-SVG diagrams, graceful degradation) is
distilled from reading all 20 reference files.
Supersedes and removes three overlapping skills, folding their unique value in:
- sketch -> the fidelity dial (throwaway vs presentation) + the
multi-variant comparison layouts + the browser-vision
verify loop (references/fidelity-and-verify.md)
- architecture-diagram-> the dark "infra" token variant + double-rect masking +
semantic component palette (references/dark-tech.md,
templates/diagram.html infra mode)
- concept-diagrams -> the 9-ramp educational color system + the concept
archetype library (references/concept-archetypes.md,
the light design system in templates/diagram.html)
Structure:
- SKILL.md (description exactly 60 chars), 6 references, 3 templates
- templates verified by headless-Chrome render + vision inspection
- editor export logic (file://-safe clipboard, Promise-normalized) verified in node
Cross-references updated in claude-design (new disambiguation table row drawing
the design-taste vs information-artifact boundary), design-md, pretext, spike,
and kanban-video-orchestrator. Website skill docs + catalogs regenerated;
stale EN/zh-Hans per-skill pages pruned and i18n cross-refs fixed.
Not folded (intentionally orthogonal): excalidraw (.excalidraw JSON), p5js
(generative canvas), claude-design / popular-web-designs / design-md (visual
design taste / brand vocab / token spec).
* feat(skills): ship html-effectiveness gallery as fetched reference examples
Add scripts/fetch-examples.sh (idempotent clone/pull of Anthropic's MIT
html-effectiveness gallery) + references/examples.md mapping each of the 20
example files to a mode so the agent reads the right worked example. The clone
lands in references/examples/ and is gitignored (it's a 384KB upstream repo,
not vendored). SKILL.md workflow + reference list now point at it; falls back to
the distilled pattern references when offline.
* feat(skills): make reading a gallery example a required authoring step
Reading the matching html-effectiveness example is now workflow step 2 (was an
optional aside in step 3): fetch the gallery, read_file the file for your mode,
mirror its structure. Models skip optional steps; the examples are the ground
truth, so consulting one is mandatory. Added an 'Example' column to the
mode->build quick-reference table and a 'don't skip the example' pitfall.
Also dogfooded the skill: read 03-code-review-pr.html and 13-flowchart-diagram.html
raw and reconciled the distilled references against source — aligned diff-row tint
opacity to the source's 0.15 (was 0.18) and added the .ctx/.hunk rows in
house-style.md + base.html so they match 03-code-review-pr.html verbatim.
* docs(skills): explain the consolidation + bundled-vs-optional rationale
The supersession note only stated *what* was folded, not *why* the prune is
sound. Expand SKILL.md's intro into a 'Why this skill exists' section: the three
former skills emitted the same artifact and overlapped, so consolidating removes
which-one-do-I-load ambiguity; and the optional->bundled promotion of
concept-diagrams is footprint-safe because this skill has zero deps (only cost is
the 60-char description; everything else is progressive-disclosure). States the
bundling dividing line explicitly: zero install cost + broadly useful gets
bundled, real install cost (hyperframes: Node+FFmpeg+Chromium) stays optional.
Regenerated website per-skill page to match.
---
.../creative/concept-diagrams/SKILL.md | 362 -----------------
.../apartment-floor-plan-conversion.md | 244 -----------
.../examples/automated-password-reset-flow.md | 276 -------------
.../autonomous-llm-research-agent-flow.md | 240 -----------
.../banana-journey-tree-to-smoothie.md | 161 --------
.../examples/commercial-aircraft-structure.md | 209 ----------
.../examples/cpu-ooo-microarchitecture.md | 236 -----------
.../examples/electricity-grid-flow.md | 182 ---------
.../feature-film-production-pipeline.md | 172 --------
.../hospital-emergency-department-flow.md | 165 --------
.../ml-benchmark-grouped-bar-chart.md | 114 ------
.../examples/place-order-uml-sequence.md | 325 ---------------
.../examples/smart-city-infrastructure.md | 173 --------
.../examples/smartphone-layer-anatomy.md | 154 -------
.../examples/sn2-reaction-mechanism.md | 247 ------------
.../examples/wind-turbine-structure.md | 338 ----------------
.../references/dashboard-patterns.md | 43 --
.../references/infrastructure-patterns.md | 144 -------
.../references/physical-shape-cookbook.md | 42 --
.../concept-diagrams/templates/template.html | 174 --------
.../kanban-video-orchestrator/SKILL.md | 2 +-
.../references/intake.md | 3 +-
.../references/role-archetypes.md | 5 +-
.../references/tool-matrix.md | 4 +-
skills/creative/architecture-diagram/SKILL.md | 148 -------
.../templates/template.html | 319 ---------------
skills/creative/claude-design/SKILL.md | 12 +-
skills/creative/design-md/SKILL.md | 2 +-
skills/creative/html-artifact/SKILL.md | 184 +++++++++
.../html-artifact/references/.gitignore | 3 +
.../references/concept-archetypes.md | 94 +++++
.../html-artifact/references/dark-tech.md | 92 +++++
.../html-artifact/references/examples.md | 64 +++
.../references/fidelity-and-verify.md | 78 ++++
.../html-artifact/references/house-style.md | 179 +++++++++
.../html-artifact/references/svg-diagrams.md | 123 ++++++
.../references/throwaway-editors.md | 114 ++++++
.../html-artifact/scripts/fetch-examples.sh | 43 ++
.../html-artifact/templates/base.html | 104 +++++
.../html-artifact/templates/diagram.html | 127 ++++++
.../html-artifact/templates/editor.html | 120 ++++++
skills/creative/pretext/SKILL.md | 2 +-
skills/creative/sketch/SKILL.md | 218 ----------
skills/software-development/spike/SKILL.md | 2 +-
.../docs/reference/optional-skills-catalog.md | 1 -
website/docs/reference/skills-catalog.md | 3 +-
.../autonomous-ai-agents-hermes-agent.md | 4 +-
.../creative/creative-architecture-diagram.md | 165 --------
.../creative/creative-claude-design.md | 12 +-
.../bundled/creative/creative-design-md.md | 2 +-
.../creative/creative-html-artifact.md | 202 ++++++++++
.../bundled/creative/creative-pretext.md | 2 +-
.../bundled/creative/creative-sketch.md | 238 -----------
.../creative/creative-touchdesigner-mcp.md | 2 +-
.../skills/bundled/email/email-himalaya.md | 5 +
.../bundled/github/github-github-auth.md | 4 +-
.../github/github-github-code-review.md | 4 +-
.../bundled/github/github-github-issues.md | 4 +-
.../github/github-github-pr-workflow.md | 4 +-
.../github/github-github-repo-management.md | 4 +-
.../skills/bundled/media/media-gif-search.md | 2 +-
.../note-taking/note-taking-obsidian.md | 2 +-
.../productivity/productivity-airtable.md | 4 +-
.../productivity/productivity-notion.md | 4 +-
.../productivity-teams-meeting-pipeline.md | 2 +-
.../bundled/research/research-llm-wiki.md | 2 +-
.../research-research-paper-writing.md | 2 +-
...tware-development-node-inspect-debugger.md | 2 +-
.../software-development-python-debugpy.md | 2 +-
.../software-development-spike.md | 2 +-
.../autonomous-ai-agents-honcho.md | 4 +-
.../blockchain/blockchain-hyperliquid.md | 4 +-
.../creative/creative-concept-diagrams.md | 379 ------------------
.../creative-kanban-video-orchestrator.md | 4 +-
.../optional/devops/devops-pinggy-tunnel.md | 2 +-
.../skills/optional/devops/devops-watchers.md | 2 +-
.../skills/optional/mcp/mcp-fastmcp.md | 2 +-
.../payments/payments-stripe-projects.md | 2 +-
.../productivity/productivity-canvas.md | 2 +-
.../productivity/productivity-shopify.md | 2 +-
.../productivity/productivity-siyuan.md | 2 +-
.../productivity/productivity-telephony.md | 8 +-
.../research/research-gitnexus-explorer.md | 2 +-
.../skills/optional/research/research-qmd.md | 2 +-
.../optional/security/security-1password.md | 2 +-
.../optional/security/security-godmode.md | 2 +-
...software-development-rest-graphql-debug.md | 2 +-
.../reference/optional-skills-catalog.md | 1 -
.../current/reference/skills-catalog.md | 2 -
.../creative/creative-architecture-diagram.md | 165 --------
.../creative/creative-claude-design.md | 2 +-
.../bundled/creative/creative-design-md.md | 2 +-
.../bundled/creative/creative-pretext.md | 2 +-
.../bundled/creative/creative-sketch.md | 238 -----------
.../software-development-spike.md | 2 +-
.../creative/creative-concept-diagrams.md | 379 ------------------
.../creative-kanban-video-orchestrator.md | 2 +-
website/sidebars.ts | 5 +-
98 files changed, 1610 insertions(+), 6336 deletions(-)
delete mode 100644 optional-skills/creative/concept-diagrams/SKILL.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/apartment-floor-plan-conversion.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/automated-password-reset-flow.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/autonomous-llm-research-agent-flow.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/banana-journey-tree-to-smoothie.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/commercial-aircraft-structure.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/cpu-ooo-microarchitecture.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/electricity-grid-flow.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/feature-film-production-pipeline.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/hospital-emergency-department-flow.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/ml-benchmark-grouped-bar-chart.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/place-order-uml-sequence.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/smart-city-infrastructure.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/smartphone-layer-anatomy.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/sn2-reaction-mechanism.md
delete mode 100644 optional-skills/creative/concept-diagrams/examples/wind-turbine-structure.md
delete mode 100644 optional-skills/creative/concept-diagrams/references/dashboard-patterns.md
delete mode 100644 optional-skills/creative/concept-diagrams/references/infrastructure-patterns.md
delete mode 100644 optional-skills/creative/concept-diagrams/references/physical-shape-cookbook.md
delete mode 100644 optional-skills/creative/concept-diagrams/templates/template.html
delete mode 100644 skills/creative/architecture-diagram/SKILL.md
delete mode 100644 skills/creative/architecture-diagram/templates/template.html
create mode 100644 skills/creative/html-artifact/SKILL.md
create mode 100644 skills/creative/html-artifact/references/.gitignore
create mode 100644 skills/creative/html-artifact/references/concept-archetypes.md
create mode 100644 skills/creative/html-artifact/references/dark-tech.md
create mode 100644 skills/creative/html-artifact/references/examples.md
create mode 100644 skills/creative/html-artifact/references/fidelity-and-verify.md
create mode 100644 skills/creative/html-artifact/references/house-style.md
create mode 100644 skills/creative/html-artifact/references/svg-diagrams.md
create mode 100644 skills/creative/html-artifact/references/throwaway-editors.md
create mode 100755 skills/creative/html-artifact/scripts/fetch-examples.sh
create mode 100644 skills/creative/html-artifact/templates/base.html
create mode 100644 skills/creative/html-artifact/templates/diagram.html
create mode 100644 skills/creative/html-artifact/templates/editor.html
delete mode 100644 skills/creative/sketch/SKILL.md
delete mode 100644 website/docs/user-guide/skills/bundled/creative/creative-architecture-diagram.md
create mode 100644 website/docs/user-guide/skills/bundled/creative/creative-html-artifact.md
delete mode 100644 website/docs/user-guide/skills/bundled/creative/creative-sketch.md
delete mode 100644 website/docs/user-guide/skills/optional/creative/creative-concept-diagrams.md
delete mode 100644 website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/bundled/creative/creative-architecture-diagram.md
delete mode 100644 website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/bundled/creative/creative-sketch.md
delete mode 100644 website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/skills/optional/creative/creative-concept-diagrams.md
diff --git a/optional-skills/creative/concept-diagrams/SKILL.md b/optional-skills/creative/concept-diagrams/SKILL.md
deleted file mode 100644
index 6017d4fd121..00000000000
--- a/optional-skills/creative/concept-diagrams/SKILL.md
+++ /dev/null
@@ -1,362 +0,0 @@
----
-name: concept-diagrams
-description: Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language with 9 semantic color ramps, sentence-case typography, and automatic dark mode. Best suited for educational and non-software visuals — physics setups, chemistry mechanisms, math curves, physical objects (aircraft, turbines, smartphones, mechanical watches), anatomy, floor plans, cross-sections, narrative journeys (lifecycle of X, process of Y), hub-spoke system integrations (smart city, IoT), and exploded layer views. If a more specialized skill exists for the subject (dedicated software/cloud architecture, hand-drawn sketches, animated explainers, etc.), prefer that — otherwise this skill can also serve as a general-purpose SVG diagram fallback with a clean educational look. Ships with 15 example diagrams.
-version: 0.1.0
-author: v1k22 (original PR), ported into hermes-agent
-license: MIT
-dependencies: []
-platforms: [linux, macos, windows]
-metadata:
- hermes:
- tags: [diagrams, svg, visualization, education, physics, chemistry, engineering]
- related_skills: [architecture-diagram, excalidraw, generative-widgets]
----
-
-# Concept Diagrams
-
-Generate production-quality SVG diagrams with a unified flat, minimal design system. Output is a single self-contained HTML file that renders identically in any modern browser, with automatic light/dark mode.
-
-## Scope
-
-**Best suited for:**
-- Physics setups, chemistry mechanisms, math curves, biology
-- Physical objects (aircraft, turbines, smartphones, mechanical watches, cells)
-- Anatomy, cross-sections, exploded layer views
-- Floor plans, architectural conversions
-- Narrative journeys (lifecycle of X, process of Y)
-- Hub-spoke system integrations (smart city, IoT networks, electricity grids)
-- Educational / textbook-style visuals in any domain
-- Quantitative charts (grouped bars, energy profiles)
-
-**Look elsewhere first for:**
-- Dedicated software / cloud infrastructure architecture with a dark tech aesthetic (consider `architecture-diagram` if available)
-- Hand-drawn whiteboard sketches (consider `excalidraw` if available)
-- Animated explainers or video output (consider an animation skill)
-
-If a more specialized skill is available for the subject, prefer that. If none fits, this skill can serve as a general-purpose SVG diagram fallback — the output will carry the clean educational aesthetic described below, which is a reasonable default for almost any subject.
-
-## Workflow
-
-1. Decide on the diagram type (see Diagram Types below).
-2. Lay out components using the Design System rules.
-3. Write the full HTML page using `templates/template.html` as the wrapper — paste your SVG where the template says ``.
-4. Save as a standalone `.html` file (for example `~/my-diagram.html` or `./my-diagram.html`).
-5. User opens it directly in a browser — no server, no dependencies.
-
-Optional: if the user wants a browsable gallery of multiple diagrams, see "Local Preview Server" at the bottom.
-
-Load the HTML template:
-```
-skill_view(name="concept-diagrams", file_path="templates/template.html")
-```
-
-The template embeds the full CSS design system (`c-*` color classes, text classes, light/dark variables, arrow marker styles). The SVG you generate relies on these classes being present on the hosting page.
-
----
-
-## Design System
-
-### Philosophy
-
-- **Flat**: no gradients, drop shadows, blur, glow, or neon effects.
-- **Minimal**: show the essential. No decorative icons inside boxes.
-- **Consistent**: same colors, spacing, typography, and stroke widths across every diagram.
-- **Dark-mode ready**: all colors auto-adapt via CSS classes — no per-mode SVG.
-
-### Color Palette
-
-9 color ramps, each with 7 stops. Put the class name on a `` or shape element; the template CSS handles both modes.
-
-| Class | 50 (lightest) | 100 | 200 | 400 | 600 | 800 | 900 (darkest) |
-|------------|---------------|---------|---------|---------|---------|---------|---------------|
-| `c-purple` | #EEEDFE | #CECBF6 | #AFA9EC | #7F77DD | #534AB7 | #3C3489 | #26215C |
-| `c-teal` | #E1F5EE | #9FE1CB | #5DCAA5 | #1D9E75 | #0F6E56 | #085041 | #04342C |
-| `c-coral` | #FAECE7 | #F5C4B3 | #F0997B | #D85A30 | #993C1D | #712B13 | #4A1B0C |
-| `c-pink` | #FBEAF0 | #F4C0D1 | #ED93B1 | #D4537E | #993556 | #72243E | #4B1528 |
-| `c-gray` | #F1EFE8 | #D3D1C7 | #B4B2A9 | #888780 | #5F5E5A | #444441 | #2C2C2A |
-| `c-blue` | #E6F1FB | #B5D4F4 | #85B7EB | #378ADD | #185FA5 | #0C447C | #042C53 |
-| `c-green` | #EAF3DE | #C0DD97 | #97C459 | #639922 | #3B6D11 | #27500A | #173404 |
-| `c-amber` | #FAEEDA | #FAC775 | #EF9F27 | #BA7517 | #854F0B | #633806 | #412402 |
-| `c-red` | #FCEBEB | #F7C1C1 | #F09595 | #E24B4A | #A32D2D | #791F1F | #501313 |
-
-#### Color Assignment Rules
-
-Color encodes **meaning**, not sequence. Never cycle through colors like a rainbow.
-
-- Group nodes by **category** — all nodes of the same type share one color.
-- Use `c-gray` for neutral/structural nodes (start, end, generic steps, users).
-- Use **2-3 colors per diagram**, not 6+.
-- Prefer `c-purple`, `c-teal`, `c-coral`, `c-pink` for general categories.
-- Reserve `c-blue`, `c-green`, `c-amber`, `c-red` for semantic meaning (info, success, warning, error).
-
-Light/dark stop mapping (handled by the template CSS — just use the class):
-- Light mode: 50 fill + 600 stroke + 800 title / 600 subtitle
-- Dark mode: 800 fill + 200 stroke + 100 title / 200 subtitle
-
-### Typography
-
-Only two font sizes. No exceptions.
-
-| Class | Size | Weight | Use |
-|-------|------|--------|-----|
-| `th` | 14px | 500 | Node titles, region labels |
-| `ts` | 12px | 400 | Subtitles, descriptions, arrow labels |
-| `t` | 14px | 400 | General text |
-
-- **Sentence case always.** Never Title Case, never ALL CAPS.
-- Every `` MUST carry a class (`t`, `ts`, or `th`). No unclassed text.
-- `dominant-baseline="central"` on all text inside boxes.
-- `text-anchor="middle"` for centered text in boxes.
-
-**Width estimation (approx):**
-- 14px weight 500: ~8px per character
-- 12px weight 400: ~6.5px per character
-- Always verify: `box_width >= (char_count × px_per_char) + 48` (24px padding each side)
-
-### Spacing & Layout
-
-- **ViewBox**: `viewBox="0 0 680 H"` where H = content height + 40px buffer.
-- **Safe area**: x=40 to x=640, y=40 to y=(H-40).
-- **Between boxes**: 60px minimum gap.
-- **Inside boxes**: 24px horizontal padding, 12px vertical padding.
-- **Arrowhead gap**: 10px between arrowhead and box edge.
-- **Single-line box**: 44px height.
-- **Two-line box**: 56px height, 18px between title and subtitle baselines.
-- **Container padding**: 20px minimum inside every container.
-- **Max nesting**: 2-3 levels deep. Deeper gets unreadable at 680px width.
-
-### Stroke & Shape
-
-- **Stroke width**: 0.5px on all node borders. Not 1px, not 2px.
-- **Rect rounding**: `rx="8"` for nodes, `rx="12"` for inner containers, `rx="16"` to `rx="20"` for outer containers.
-- **Connector paths**: MUST have `fill="none"`. SVG defaults to `fill: black` otherwise.
-
-### Arrow Marker
-
-Include this `` block at the start of **every** SVG:
-
-```xml
-
-
-
-
-
-```
-
-Use `marker-end="url(#arrow)"` on lines. The arrowhead inherits the line color via `context-stroke`.
-
-### CSS Classes (Provided by the Template)
-
-The template page provides:
-
-- Text: `.t`, `.ts`, `.th`
-- Neutral: `.box`, `.arr`, `.leader`, `.node`
-- Color ramps: `.c-purple`, `.c-teal`, `.c-coral`, `.c-pink`, `.c-gray`, `.c-blue`, `.c-green`, `.c-amber`, `.c-red` (all with automatic light/dark mode)
-
-You do **not** need to redefine these — just apply them in your SVG. The template file contains the full CSS definitions.
-
----
-
-## SVG Boilerplate
-
-Every SVG inside the template page starts with this exact structure:
-
-```xml
-
-```
-
-Replace `{HEIGHT}` with the actual computed height (last element bottom + 40px).
-
-### Node Patterns
-
-**Single-line node (44px):**
-```xml
-
-
- Service name
-
-```
-
-**Two-line node (56px):**
-```xml
-
-
- Service name
- Short description
-
-```
-
-**Connector (no label):**
-```xml
-
-```
-
-**Container (dashed or solid):**
-```xml
-
-
- Container label
- Subtitle info
-
-```
-
----
-
-## Diagram Types
-
-Choose the layout that fits the subject:
-
-1. **Flowchart** — CI/CD pipelines, request lifecycles, approval workflows, data processing. Single-direction flow (top-down or left-right). Max 4-5 nodes per row.
-2. **Structural / Containment** — Cloud infrastructure nesting, system architecture with layers. Large outer containers with inner regions. Dashed rects for logical groupings.
-3. **API / Endpoint Map** — REST routes, GraphQL schemas. Tree from root, branching to resource groups, each containing endpoint nodes.
-4. **Microservice Topology** — Service mesh, event-driven systems. Services as nodes, arrows for communication patterns, message queues between.
-5. **Data Flow** — ETL pipelines, streaming architectures. Left-to-right flow from sources through processing to sinks.
-6. **Physical / Structural** — Vehicles, buildings, hardware, anatomy. Use shapes that match the physical form — `` for curved bodies, `` for tapered shapes, ``/`` for cylindrical parts, nested `` for compartments. See `references/physical-shape-cookbook.md`.
-7. **Infrastructure / Systems Integration** — Smart cities, IoT networks, multi-domain systems. Hub-spoke layout with central platform connecting subsystems. Semantic line styles (`.data-line`, `.power-line`, `.water-pipe`, `.road`). See `references/infrastructure-patterns.md`.
-8. **UI / Dashboard Mockups** — Admin panels, monitoring dashboards. Screen frame with nested chart/gauge/indicator elements. See `references/dashboard-patterns.md`.
-
-For physical, infrastructure, and dashboard diagrams, load the matching reference file before generating — each one provides ready-made CSS classes and shape primitives.
-
----
-
-## Validation Checklist
-
-Before finalizing any SVG, verify ALL of the following:
-
-1. Every `` has class `t`, `ts`, or `th`.
-2. Every `` inside a box has `dominant-baseline="central"`.
-3. Every connector `` or `` used as arrow has `fill="none"`.
-4. No arrow line crosses through an unrelated box.
-5. `box_width >= (longest_label_chars × 8) + 48` for 14px text.
-6. `box_width >= (longest_label_chars × 6.5) + 48` for 12px text.
-7. ViewBox height = bottom-most element + 40px.
-8. All content stays within x=40 to x=640.
-9. Color classes (`c-*`) are on `` or shape elements, never on `` connectors.
-10. Arrow `` block is present.
-11. No gradients, shadows, blur, or glow effects.
-12. Stroke width is 0.5px on all node borders.
-
----
-
-## Output & Preview
-
-### Default: standalone HTML file
-
-Write a single `.html` file the user can open directly. No server, no dependencies, works offline. Pattern:
-
-```python
-# 1. Load the template
-template = skill_view("concept-diagrams", "templates/template.html")
-
-# 2. Fill in title, subtitle, and paste your SVG
-html = template.replace(
- "", "SN2 reaction mechanism"
-).replace(
- "", "Bimolecular nucleophilic substitution"
-).replace(
- "", svg_content
-)
-
-# 3. Write to a user-chosen path (or ./ by default)
-write_file("./sn2-mechanism.html", html)
-```
-
-Tell the user how to open it:
-
-```
-# macOS
-open ./sn2-mechanism.html
-# Linux
-xdg-open ./sn2-mechanism.html
-```
-
-### Optional: local preview server (multi-diagram gallery)
-
-Only use this when the user explicitly wants a browsable gallery of multiple diagrams.
-
-**Rules:**
-- Bind to `127.0.0.1` only. Never `0.0.0.0`. Exposing diagrams on all network interfaces is a security hazard on shared networks.
-- Pick a free port (do NOT hard-code one) and tell the user the chosen URL.
-- The server is optional and opt-in — prefer the standalone HTML file first.
-
-Recommended pattern (lets the OS pick a free ephemeral port):
-
-```bash
-# Put each diagram in its own folder under .diagrams/
-mkdir -p .diagrams/sn2-mechanism
-# ...write .diagrams/sn2-mechanism/index.html...
-
-# Serve on loopback only, free port
-cd .diagrams && python3 -c "
-import http.server, socketserver
-with socketserver.TCPServer(('127.0.0.1', 0), http.server.SimpleHTTPRequestHandler) as s:
- print(f'Serving at http://127.0.0.1:{s.server_address[1]}/')
- s.serve_forever()
-" &
-```
-
-If the user insists on a fixed port, use `127.0.0.1:` — still never `0.0.0.0`. Document how to stop the server (`kill %1` or `pkill -f "http.server"`).
-
----
-
-## Examples Reference
-
-The `examples/` directory ships 15 complete, tested diagrams. Browse them for working patterns before writing a new diagram of a similar type:
-
-| File | Type | Demonstrates |
-|------|------|--------------|
-| `hospital-emergency-department-flow.md` | Flowchart | Priority routing with semantic colors |
-| `feature-film-production-pipeline.md` | Flowchart | Phased workflow, horizontal sub-flows |
-| `automated-password-reset-flow.md` | Flowchart | Auth flow with error branches |
-| `autonomous-llm-research-agent-flow.md` | Flowchart | Loop-back arrows, decision branches |
-| `place-order-uml-sequence.md` | Sequence | UML sequence diagram style |
-| `commercial-aircraft-structure.md` | Physical | Paths, polygons, ellipses for realistic shapes |
-| `wind-turbine-structure.md` | Physical cross-section | Underground/above-ground separation, color coding |
-| `smartphone-layer-anatomy.md` | Exploded view | Alternating left/right labels, layered components |
-| `apartment-floor-plan-conversion.md` | Floor plan | Walls, doors, proposed changes in dotted red |
-| `banana-journey-tree-to-smoothie.md` | Narrative journey | Winding path, progressive state changes |
-| `cpu-ooo-microarchitecture.md` | Hardware pipeline | Fan-out, memory hierarchy sidebar |
-| `sn2-reaction-mechanism.md` | Chemistry | Molecules, curved arrows, energy profile |
-| `smart-city-infrastructure.md` | Hub-spoke | Semantic line styles per system |
-| `electricity-grid-flow.md` | Multi-stage flow | Voltage hierarchy, flow markers |
-| `ml-benchmark-grouped-bar-chart.md` | Chart | Grouped bars, dual axis |
-
-Load any example with:
-```
-skill_view(name="concept-diagrams", file_path="examples/")
-```
-
----
-
-## Quick Reference: What to Use When
-
-| User says | Diagram type | Suggested colors |
-|-----------|--------------|------------------|
-| "show the pipeline" | Flowchart | gray start/end, purple steps, red errors, teal deploy |
-| "draw the data flow" | Data pipeline (left-right) | gray sources, purple processing, teal sinks |
-| "visualize the system" | Structural (containment) | purple container, teal services, coral data |
-| "map the endpoints" | API tree | purple root, one ramp per resource group |
-| "show the services" | Microservice topology | gray ingress, teal services, purple bus, coral workers |
-| "draw the aircraft/vehicle" | Physical | paths, polygons, ellipses for realistic shapes |
-| "smart city / IoT" | Hub-spoke integration | semantic line styles per subsystem |
-| "show the dashboard" | UI mockup | dark screen, chart colors: teal, purple, coral for alerts |
-| "power grid / electricity" | Multi-stage flow | voltage hierarchy (HV/MV/LV line weights) |
-| "wind turbine / turbine" | Physical cross-section | foundation + tower cutaway + nacelle color-coded |
-| "journey of X / lifecycle" | Narrative journey | winding path, progressive state changes |
-| "layers of X / exploded" | Exploded layer view | vertical stack, alternating labels |
-| "CPU / pipeline" | Hardware pipeline | vertical stages, fan-out to execution ports |
-| "floor plan / apartment" | Floor plan | walls, doors, proposed changes in dotted red |
-| "reaction mechanism" | Chemistry | atoms, bonds, curved arrows, transition state, energy profile |
diff --git a/optional-skills/creative/concept-diagrams/examples/apartment-floor-plan-conversion.md b/optional-skills/creative/concept-diagrams/examples/apartment-floor-plan-conversion.md
deleted file mode 100644
index 7c11d3401e5..00000000000
--- a/optional-skills/creative/concept-diagrams/examples/apartment-floor-plan-conversion.md
+++ /dev/null
@@ -1,244 +0,0 @@
-# Apartment Floor Plan: 3 BHK to 4 BHK Conversion
-
-An architectural floor plan showing a 1,500 sq ft apartment with proposed modifications to convert from 3 BHK to 4 BHK. Demonstrates architectural drawing conventions, room layouts, proposed changes with dotted lines, and area comparison tables.
-
-## Key Patterns Used
-
-- **Architectural floor plan**: Top-down view with walls, doors, windows
-- **Proposed modifications**: Dotted red lines for new walls
-- **Room color coding**: Light fills to distinguish room types
-- **Circulation paths**: Arrows showing new access routes
-- **Data table**: Before/after area comparison with highlighting
-- **Architectural symbols**: North arrow, scale bar, door swings
-
-## Diagram Type
-
-This is an **architectural floor plan** with:
-- **Plan view**: Top-down orthographic projection
-- **Overlay technique**: Existing structure + proposed changes
-- **Quantitative data**: Area measurements and comparison table
-
-## Architectural Drawing Elements
-
-### Wall Styles
-
-```xml
-
-
-
-
-
-
-
-
-```
-
-```css
-.wall { stroke: var(--text-primary); stroke-width: 6; fill: none; stroke-linecap: square; }
-.wall-thin { stroke: var(--text-primary); stroke-width: 3; fill: none; }
-.proposed-wall { stroke: #A32D2D; stroke-width: 4; fill: none; stroke-dasharray: 8 4; }
-```
-
-### Door Symbols
-
-```xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-```
-
-```css
-.door { stroke: var(--text-secondary); stroke-width: 1.5; fill: none; }
-.door-swing { stroke: var(--text-tertiary); stroke-width: 1; fill: none; stroke-dasharray: 3 2; }
-```
-
-### Window Symbols
-
-```xml
-
-
-
-
-
-
-
-```
-
-```css
-.window { stroke: var(--text-primary); stroke-width: 1; fill: var(--bg-primary); }
-.window-glass { stroke: #378ADD; stroke-width: 2; fill: none; }
-```
-
-### Room Fills
-
-```xml
-
-
-
-
-
-
-
-
-
-```
-
-```css
-.room-master { fill: rgba(206, 203, 246, 0.3); } /* purple tint */
-.room-bed2 { fill: rgba(159, 225, 203, 0.3); } /* teal tint */
-.room-bed3 { fill: rgba(250, 199, 117, 0.3); } /* amber tint */
-.room-living { fill: rgba(245, 196, 179, 0.3); } /* coral tint */
-.room-kitchen { fill: rgba(237, 147, 177, 0.3); } /* pink tint */
-.room-bath { fill: rgba(133, 183, 235, 0.3); } /* blue tint */
-.room-new { fill: rgba(163, 45, 45, 0.15); } /* red tint for proposed */
-```
-
-### Support Fixtures
-
-```xml
-
-
-Counter
-
-
-
-```
-
-```css
-.balcony { fill: none; stroke: var(--text-secondary); stroke-width: 2; stroke-dasharray: 6 3; }
-.balcony-fill { fill: rgba(93, 202, 165, 0.1); }
-```
-
-### Room Labels
-
-```xml
-
-MASTER
-BEDROOM
-195 sq ft
-
-
-BEDROOM 4
-(NEW)
-```
-
-```css
-.room-label { font-family: system-ui; font-size: 11px; fill: var(--text-primary); font-weight: 500; }
-.area-label { font-family: system-ui; font-size: 9px; fill: var(--text-tertiary); }
-```
-
-### Circulation Arrow
-
-```xml
-
-
-
-
-
-
-
-New corridor access
-```
-
-```css
-.circulation { stroke: #3B6D11; stroke-width: 2; fill: none; }
-.circulation-fill { fill: #3B6D11; }
-```
-
-### North Arrow and Scale Bar
-
-```xml
-
-
-
-
- N
-
-
-
-
-
-
-
-
- 0
- 5'
- 10'
-
-```
-
-## Area Comparison Table
-
-### Table Structure
-
-```xml
-
-
-Room
-
-
-
-Master Bedroom
-195
-
-
-
-
-
-
-Bedroom 4 (NEW)
-+100
-
-
-
-TOTAL CARPET AREA
-```
-
-```css
-.table-header { fill: var(--bg-secondary); }
-.table-row { fill: var(--bg-primary); stroke: var(--border); stroke-width: 0.5; }
-.table-row-alt { fill: var(--bg-tertiary); stroke: var(--border); stroke-width: 0.5; }
-.table-highlight { fill: rgba(163, 45, 45, 0.1); stroke: #A32D2D; stroke-width: 0.5; }
-```
-
-## Layout Notes
-
-- **ViewBox**: 800×780 (portrait for floor plan + table)
-- **Scale**: 10px = 1 foot (apartment ~50ft × 33ft)
-- **Floor plan origin**: Offset at (50, 60) for margins
-- **Wall thickness**: 6px outer, 3px inner (represents ~6" walls)
-- **Room labels**: Centered in each room with area below
-- **Table placement**: Below floor plan with full width
-
-## Color Coding
-
-| Element | Color | Usage |
-|---------|-------|-------|
-| Proposed walls | Red (#A32D2D) dotted | New construction |
-| New room fill | Red 15% opacity | Bedroom 4 area |
-| Circulation | Green (#3B6D11) | New access path |
-| Window glass | Blue (#378ADD) | Glass indication |
-| Bedrooms | Purple/Teal/Amber tints | Room differentiation |
-| Wet areas | Blue tint | Bathrooms |
-| Living | Coral tint | Common areas |
-
-## When to Use This Pattern
-
-Use this diagram style for:
-- Apartment/house floor plans
-- Office layout planning
-- Renovation proposals showing before/after
-- Space planning with area calculations
-- Real estate marketing materials
-- Interior design presentations
-- Building permit documentation
diff --git a/optional-skills/creative/concept-diagrams/examples/automated-password-reset-flow.md b/optional-skills/creative/concept-diagrams/examples/automated-password-reset-flow.md
deleted file mode 100644
index 86cd1cc0782..00000000000
--- a/optional-skills/creative/concept-diagrams/examples/automated-password-reset-flow.md
+++ /dev/null
@@ -1,276 +0,0 @@
-# Automated Password Reset Flow
-
-A two-section flowchart tracing the full user journey for a web application password reset: the initial request phase (forgot password → email check → token generation) and the reset-form phase (link click → new password entry → token/password validation). Demonstrates multi-exit decision diamonds, a three-column branching layout, a loop-back path, and a cross-section separator arrow.
-
-## Key Patterns Used
-
-- **Three-column layout**: Left column (error/terminal branches at cx=115), center column (main happy path at cx=340), right column (expired-token branch at cx=552) — allows side branches to live at the same y-level as center nodes without overlap
-- **Decision diamonds with ``**: Each decision uses a `` wrapper containing a `` and centered ``; the diamond points are computed as `cx±hw, cy±hh` (hw=100, hh=28)
-- **Pill-shaped terminals**: Start and end nodes use `rx=22` on their `` to signal entry/exit points; all mid-flow process nodes use `rx=8`
-- **Three-branch decision paths**: Each diamond has a "Yes" branch (down, short ``) and a "No" branch (`` going horizontal then vertical to a side column)
-- **Loop-back path**: Mismatch error node loops back to the password-entry node via a routing corridor at x=215 — a 5-px gap between the left column (right edge x=210) and center column (left edge x=220); the path exits the bottom of the error node, drops below it, travels right to x=215, then goes up to the target node's center y, then right 5 px into the node's left edge
-- **Section separator**: A dashed horizontal `` at y=452 splits the two phases; the connecting arrow crosses it with a faded label ("user receives email") to preserve flow continuity
-- **Italic annotation**: The exact UX copy for the generic message ("If that email exists…") is shown as a faded italic `ts` text block below the left-branch terminal node
-- **Legend row**: Five inline swatches (gray, purple, teal, red, amber diamond) at the bottom explain the color-to-role mapping
-
-## Diagram
-
-```xml
-
-```
-
-## Custom CSS
-
-Add these classes to the hosting page `
-
-
-
-
-
-
-
-
-
diff --git a/optional-skills/creative/kanban-video-orchestrator/SKILL.md b/optional-skills/creative/kanban-video-orchestrator/SKILL.md
index c5ac2a8c96e..f323406300b 100644
--- a/optional-skills/creative/kanban-video-orchestrator/SKILL.md
+++ b/optional-skills/creative/kanban-video-orchestrator/SKILL.md
@@ -8,7 +8,7 @@ platforms: [linux, macos, windows]
metadata:
hermes:
tags: [video, kanban, multi-agent, orchestration, production-pipeline]
- related_skills: [kanban-orchestrator, kanban-worker, ascii-video, manim-video, p5js, comfyui, touchdesigner-mcp, blender-mcp, pixel-art, ascii-art, songwriting-and-ai-music, heartmula, songsee, spotify, youtube-content, claude-design, excalidraw, architecture-diagram, concept-diagrams, baoyu-comic, baoyu-infographic, humanizer, gif-search, meme-generation]
+ related_skills: [kanban-orchestrator, kanban-worker, ascii-video, manim-video, p5js, comfyui, touchdesigner-mcp, blender-mcp, pixel-art, ascii-art, songwriting-and-ai-music, heartmula, songsee, spotify, youtube-content, claude-design, excalidraw, html-artifact, baoyu-comic, baoyu-infographic, humanizer, gif-search, meme-generation]
credits: |
The single-project workspace layout, profile-config patching pattern,
SOUL.md-per-profile model, TEAM.md task-graph convention, and
diff --git a/optional-skills/creative/kanban-video-orchestrator/references/intake.md b/optional-skills/creative/kanban-video-orchestrator/references/intake.md
index d290b606f49..1f817da020b 100644
--- a/optional-skills/creative/kanban-video-orchestrator/references/intake.md
+++ b/optional-skills/creative/kanban-video-orchestrator/references/intake.md
@@ -96,8 +96,7 @@ texture inside the final scene.
- **Terminal-only or with GUI?**
- **Voiceover for narration?**
- **Diagram support needed?** — Often these benefit from a diagram skill
- alongside the screen-capture/render step (`excalidraw`,
- `architecture-diagram`, `concept-diagrams`)
+ alongside the screen-capture/render step (`excalidraw`, `html-artifact`)
### ASCII / terminal art
diff --git a/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md b/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md
index 95eaeb33b66..c5e15c06f4b 100644
--- a/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md
+++ b/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md
@@ -59,7 +59,7 @@ local skills.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker` plus any project-specific design skill —
- `claude-design` (UI/web), `sketch` (quick mockup variants),
+ `claude-design` (UI/web), `html-artifact` (quick mockup variants, explainers, diagrams),
`popular-web-designs` (matching known web aesthetic), `pixel-art` (retro),
`ascii-art` (terminal/retro), `excalidraw` (hand-drawn frames),
`design-md` (text-based design docs)
@@ -72,8 +72,7 @@ film and music video. Often pairs with a diagramming tool.
- **Toolsets:** kanban, file
- **Skills:** `kanban-worker` plus a diagram skill — `excalidraw` (sketch),
- `architecture-diagram` (technical/system), `concept-diagrams` (educational/
- scientific)
+ `html-artifact` (technical/system + educational/scientific diagrams)
- **Outputs:** `storyboard.md` with one row per scene/shot, optional
storyboard sketches
diff --git a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md
index b5e59c31478..2f27ffc41e7 100644
--- a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md
+++ b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md
@@ -30,10 +30,8 @@ called from the terminal toolset; they don't appear in `always_load`.
| `claude-design` | Design one-off HTML artifacts (landing, deck, prototype) | Concept artist for product video style frames; storyboarder for UI-heavy content |
| `design-md` | Design markdown docs | Concept artist documenting visual specs |
| `popular-web-designs` | Reference patterns for popular web designs | Concept artist; cinematographer when matching a known UI aesthetic |
-| `sketch` | Throwaway HTML mockups (2-3 design variants to compare) | Concept artist exploring directions; storyboarder for UI flows |
| `excalidraw` | Excalidraw-style hand-drawn diagrams | Storyboarder; concept artist for sketch-style frames |
-| `architecture-diagram` | Software architecture diagrams | Storyboarder for technical content; explainer scenes about systems |
-| `concept-diagrams` *(optional)* | Flat, minimal SVG diagrams (educational visual language; physics, chemistry, math, anatomy, etc.) | Renderer / storyboarder for explainer scenes with clean educational diagrams |
+| `html-artifact` | Self-contained HTML artifacts: throwaway mockup variants, explainers, dark-tech architecture + educational SVG diagrams | Concept artist exploring directions; storyboarder for UI flows + technical/educational explainer scenes |
| `pretext` | Mathematical/scientific content authoring | Writer / cinematographer for technical-explainer pretexts |
| `creative-ideation` | Constraint-driven project ideation | Director / cinematographer when the brief is wide-open and needs framing |
| `humanizer` | Strip AI-isms from text, add real voice | Writer / copywriter post-process to avoid AI-tells in scripts and VO copy |
diff --git a/skills/creative/architecture-diagram/SKILL.md b/skills/creative/architecture-diagram/SKILL.md
deleted file mode 100644
index 2c813c53c13..00000000000
--- a/skills/creative/architecture-diagram/SKILL.md
+++ /dev/null
@@ -1,148 +0,0 @@
----
-name: architecture-diagram
-description: "Dark-themed SVG architecture/cloud/infra diagrams as HTML."
-version: 1.0.0
-author: Cocoon AI (hello@cocoon-ai.com), ported by Hermes Agent
-license: MIT
-dependencies: []
-platforms: [linux, macos, windows]
-metadata:
- hermes:
- tags: [architecture, diagrams, SVG, HTML, visualization, infrastructure, cloud]
- related_skills: [concept-diagrams, excalidraw]
----
-
-# Architecture Diagram Skill
-
-Generate professional, dark-themed technical architecture diagrams as standalone HTML files with inline SVG graphics. No external tools, no API keys, no rendering libraries — just write the HTML file and open it in a browser.
-
-## Scope
-
-**Best suited for:**
-- Software system architecture (frontend / backend / database layers)
-- Cloud infrastructure (VPC, regions, subnets, managed services)
-- Microservice / service-mesh topology
-- Database + API map, deployment diagrams
-- Anything with a tech-infra subject that fits a dark, grid-backed aesthetic
-
-**Look elsewhere first for:**
-- Physics, chemistry, math, biology, or other scientific subjects
-- Physical objects (vehicles, hardware, anatomy, cross-sections)
-- Floor plans, narrative journeys, educational / textbook-style visuals
-- Hand-drawn whiteboard sketches (consider `excalidraw`)
-- Animated explainers (consider an animation skill)
-
-If a more specialized skill is available for the subject, prefer that. If none fits, this skill can also serve as a general SVG diagram fallback — the output will just carry the dark tech aesthetic described below.
-
-Based on [Cocoon AI's architecture-diagram-generator](https://github.com/Cocoon-AI/architecture-diagram-generator) (MIT).
-
-## Workflow
-
-1. User describes their system architecture (components, connections, technologies)
-2. Generate the HTML file following the design system below
-3. Save with `write_file` to a `.html` file (e.g. `~/architecture-diagram.html`)
-4. User opens in any browser — works offline, no dependencies
-
-### Output Location
-
-Save diagrams to a user-specified path, or default to the current working directory:
-```
-./[project-name]-architecture.html
-```
-
-### Preview
-
-After saving, suggest the user open it:
-```bash
-# macOS
-open ./my-architecture.html
-# Linux
-xdg-open ./my-architecture.html
-```
-
-## Design System & Visual Language
-
-### Color Palette (Semantic Mapping)
-
-Use specific `rgba` fills and hex strokes to categorize components:
-
-| Component Type | Fill (rgba) | Stroke (Hex) |
-| :--- | :--- | :--- |
-| **Frontend** | `rgba(8, 51, 68, 0.4)` | `#22d3ee` (cyan-400) |
-| **Backend** | `rgba(6, 78, 59, 0.4)` | `#34d399` (emerald-400) |
-| **Database** | `rgba(76, 29, 149, 0.4)` | `#a78bfa` (violet-400) |
-| **AWS/Cloud** | `rgba(120, 53, 15, 0.3)` | `#fbbf24` (amber-400) |
-| **Security** | `rgba(136, 19, 55, 0.4)` | `#fb7185` (rose-400) |
-| **Message Bus** | `rgba(251, 146, 60, 0.3)` | `#fb923c` (orange-400) |
-| **External** | `rgba(30, 41, 59, 0.5)` | `#94a3b8` (slate-400) |
-
-### Typography & Background
-- **Font:** JetBrains Mono (Monospace), loaded from Google Fonts
-- **Sizes:** 12px (Names), 9px (Sublabels), 8px (Annotations), 7px (Tiny labels)
-- **Background:** Slate-950 (`#020617`) with a subtle 40px grid pattern
-
-```svg
-
-
-
-
-```
-
-## Technical Implementation Details
-
-### Component Rendering
-Components are rounded rectangles (`rx="6"`) with 1.5px strokes. To prevent arrows from showing through semi-transparent fills, use a **double-rect masking technique**:
-1. Draw an opaque background rect (`#0f172a`)
-2. Draw the semi-transparent styled rect on top
-
-### Connection Rules
-- **Z-Order:** Draw arrows *early* in the SVG (after the grid) so they render behind component boxes
-- **Arrowheads:** Defined via SVG markers
-- **Security Flows:** Use dashed lines in rose color (`#fb7185`)
-- **Boundaries:**
- - *Security Groups:* Dashed (`4,4`), rose color
- - *Regions:* Large dashed (`8,4`), amber color, `rx="12"`
-
-### Spacing & Layout Logic
-- **Standard Height:** 60px (Services); 80-120px (Large components)
-- **Vertical Gap:** Minimum 40px between components
-- **Message Buses:** Must be placed *in the gap* between services, not overlapping them
-- **Legend Placement:** **CRITICAL.** Must be placed outside all boundary boxes. Calculate the lowest Y-coordinate of all boundaries and place the legend at least 20px below it.
-
-## Document Structure
-
-The generated HTML file follows a four-part layout:
-1. **Header:** Title with a pulsing dot indicator and subtitle
-2. **Main SVG:** The diagram contained within a rounded border card
-3. **Summary Cards:** A grid of three cards below the diagram for high-level details
-4. **Footer:** Minimal metadata
-
-### Info Card Pattern
-```html
-
-
-
-
Title
-
-
-
• Item one
-
• Item two
-
-
-```
-
-## Output Requirements
-- **Single File:** One self-contained `.html` file
-- **No External Dependencies:** All CSS and SVG must be inline (except Google Fonts)
-- **No JavaScript:** Use pure CSS for any animations (like pulsing dots)
-- **Compatibility:** Must render correctly in any modern web browser
-
-## Template Reference
-
-Load the full HTML template for the exact structure, CSS, and SVG component examples:
-
-```
-skill_view(name="architecture-diagram", file_path="templates/template.html")
-```
-
-The template contains working examples of every component type (frontend, backend, database, cloud, security), arrow styles (standard, dashed, curved), security groups, region boundaries, and the legend — use it as your structural reference when generating diagrams.
diff --git a/skills/creative/architecture-diagram/templates/template.html b/skills/creative/architecture-diagram/templates/template.html
deleted file mode 100644
index f5b32fbe7fd..00000000000
--- a/skills/creative/architecture-diagram/templates/template.html
+++ /dev/null
@@ -1,319 +0,0 @@
-
-
-
-
-
- [PROJECT NAME] Architecture Diagram
-
-
-
-
-
-
-
-
-
-
[PROJECT NAME] Architecture
-
-
[Subtitle description]
-
-
-
-
-
-
-
-
-
-
-
-
-
Card Title 1
-
-
-
• Item one
-
• Item two
-
• Item three
-
• Item four
-
-
-
-
-
-
-
Card Title 2
-
-
-
• Item one
-
• Item two
-
• Item three
-
• Item four
-
-
-
-
-
-
-
Card Title 3
-
-
-
• Item one
-
• Item two
-
• Item three
-
• Item four
-
-
-
-
-
-
- [Project Name] • [Additional metadata]
-
-
-
-
diff --git a/skills/creative/claude-design/SKILL.md b/skills/creative/claude-design/SKILL.md
index 673d1ff827a..d61dbcb2f00 100644
--- a/skills/creative/claude-design/SKILL.md
+++ b/skills/creative/claude-design/SKILL.md
@@ -8,7 +8,7 @@ platforms: [linux, macos, windows]
metadata:
hermes:
tags: [design, html, prototype, ux, ui, creative, artifact, deck, motion, design-system]
- related_skills: [design-md, popular-web-designs, excalidraw, architecture-diagram]
+ related_skills: [html-artifact, design-md, popular-web-designs, excalidraw]
---
# Claude Design for CLI/API Agents
@@ -19,19 +19,21 @@ The goal is to preserve Claude Design's useful design behavior and taste while r
**Before starting, check for other web-design skills like `popular-web-designs` (ready-to-paste design systems for Stripe, Linear, Vercel, Notion, etc.) and `design-md` (Google's DESIGN.md token spec format).** If the user wants a known brand's look, load `popular-web-designs` alongside this one and let it supply the visual vocabulary. If the deliverable is a token spec file rather than a rendered artifact, use `design-md` instead. Full decision table below.
-## When To Use This Skill vs `popular-web-designs` vs `design-md`
+## When To Use This Skill vs `html-artifact` vs `popular-web-designs` vs `design-md`
-Hermes has three design-related skills under `skills/creative/`. They do different jobs — load the right one (or combine them):
+Several skills produce HTML — they do different jobs. Load the right one (or combine them):
| Skill | What it gives you | Use when the user wants... |
|---|---|---|
-| **claude-design** (this one) | Design *process and taste* — how to scope a brief, gather context, produce variants, verify a local HTML artifact, avoid AI-design slop | a from-scratch designed artifact (landing page, prototype, deck, component lab, motion study) with no specific brand or token system dictated |
+| **claude-design** (this one) | Visual design *process and taste* — how to scope a brief, gather context, produce variants, verify a local HTML artifact, avoid AI-design slop | a from-scratch *designed* artifact (landing page, prototype, deck, component lab, motion study) where the look itself is the point and no specific brand or token system is dictated |
+| **html-artifact** | A house style for *information* artifacts — explainers, plans, reports, code reviews, technical/educational diagrams, throwaway editors | to *explain / plan / report / diagram / review* something as a shareable HTML page — the content is the point, not bespoke visual design |
| **popular-web-designs** | 54 ready-to-paste design systems — exact colors, typography, components, CSS values for sites like Stripe, Linear, Vercel, Notion, Airbnb | "make it look like Stripe / Linear / Vercel", a page styled after a known brand, or a visual starting point pulled from a real product |
| **design-md** | Google's DESIGN.md spec format — author/validate/diff/export design-token files, WCAG contrast checking, Tailwind/DTCG export | a formal, persistent, machine-readable design-system *spec file* (tokens + rationale) that lives in a repo and gets consumed by agents over time |
Rule of thumb:
-- **Process + taste, one-off artifact** → claude-design
+- **Bespoke visual design, taste-driven artifact** → claude-design
+- **Explain / plan / report / diagram as a shareable page** → html-artifact
- **Match a known brand's look** → popular-web-designs (and let claude-design drive the process)
- **Author the tokens spec itself** → design-md
diff --git a/skills/creative/design-md/SKILL.md b/skills/creative/design-md/SKILL.md
index 6604be1979d..e0534d9ba72 100644
--- a/skills/creative/design-md/SKILL.md
+++ b/skills/creative/design-md/SKILL.md
@@ -8,7 +8,7 @@ platforms: [linux, macos, windows]
metadata:
hermes:
tags: [design, design-system, tokens, ui, accessibility, wcag, tailwind, dtcg, google]
- related_skills: [popular-web-designs, claude-design, excalidraw, architecture-diagram]
+ related_skills: [popular-web-designs, claude-design, excalidraw, html-artifact]
---
# DESIGN.md Skill
diff --git a/skills/creative/html-artifact/SKILL.md b/skills/creative/html-artifact/SKILL.md
new file mode 100644
index 00000000000..4883e1ff4c1
--- /dev/null
+++ b/skills/creative/html-artifact/SKILL.md
@@ -0,0 +1,184 @@
+---
+name: html-artifact
+description: Build self-contained HTML files to explain, plan, or review.
+version: 1.0.0
+author: Anthropic (html-effectiveness gallery, MIT), adapted for Hermes Agent
+license: MIT
+platforms: [linux, macos, windows]
+metadata:
+ hermes:
+ tags: [html, artifact, explainer, plan, report, code-review, diagram, svg, design, prototype, editor]
+ related_skills: [claude-design, popular-web-designs, design-md, excalidraw, p5js]
+---
+
+# HTML Artifact Skill
+
+Produce a single self-contained `.html` file — no build step, no dependencies, no
+CDN — whenever the deliverable is something a human should *read, share, or poke at*:
+a concept explainer, an implementation plan, a status/incident report, a code-review
+walkthrough, a technical or educational diagram, a set of design variants, or a
+throwaway editor that exports its result back to you.
+
+HTML beats Markdown once a doc has color, layout, diagrams, tables, code, or
+interaction. It opens in any browser, shares as a link, stays readable past 100
+lines, and can carry SVG diagrams and live controls Markdown can't. Default to an
+HTML artifact when the user says "make an HTML file/artifact", or asks you to
+*explain how X works*, *write up a plan/PR/report*, *diagram* something, *compare*
+options, or *prototype* an interaction — even when they don't say "HTML".
+
+## Why this skill exists (and what it replaced)
+
+This skill **supersedes** three former skills — `sketch` (throwaway multi-variant
+HTML mockups), `architecture-diagram` (dark-tech infra SVG), and `concept-diagrams`
+(educational SVG). They were consolidated for a concrete reason: all three emitted
+the *same artifact* — a single self-contained HTML file with inline CSS/SVG — and
+overlapped heavily (three "diagram" skills, two "compare variants" paths, no shared
+token system). Folding them into one mode-switched skill removes the
+which-one-do-I-load ambiguity and gives every output the same house style, while
+keeping each skill's unique value: the fidelity dial + verify loop (from `sketch`),
+the dark infra aesthetic (from `architecture-diagram`), and the 9-ramp educational
+system + archetype library (from `concept-diagrams`).
+
+The consolidation is footprint-safe: this skill has **zero dependencies** (no Node,
+FFmpeg, Chromium, or pip packages — it authors plain HTML/CSS/SVG), so even though it
+ships **bundled** (active by default) where `concept-diagrams` was optional, the only
+always-in-context cost is this skill's one-line description. All references,
+templates, and the example gallery load on demand. `concept-diagrams` was optional
+because it was niche, not because it had an install cost — promoting that capability
+into a general-purpose, zero-dep bundled skill is the right home for it. Diagram-style
+work with a *real* install cost (e.g. `hyperframes`: Node + FFmpeg + Chromium)
+deliberately stays optional and is **not** folded in here.
+
+Use a different skill when: matching a known brand's look → `popular-web-designs`; a
+formal design-token spec file → `design-md`; a *bespoke visually-designed* artifact
+where the look itself is the point → `claude-design`; hand-drawn/whiteboard
+`.excalidraw` files → `excalidraw`; generative/animated canvas art → `p5js`. This
+skill is for everything else that ships as a readable, shareable HTML page.
+
+## Reference files (load on demand)
+
+- `references/house-style.md` — the canonical `:root` token block, type system,
+ card/table/callout/code-block patterns. **Read this before authoring any artifact.**
+- `references/examples.md` — 20 complete reference HTML files (Anthropic's
+ html-effectiveness gallery, MIT) keyed to each mode, plus the script to fetch them.
+ Read/fetch one that matches your task to calibrate the house style from a full example.
+- `references/svg-diagrams.md` — hand-authored inline SVG: arrow markers, node
+ groups, decision diamonds, edge semantics, coordinate-grid discipline. Read for
+ any flowchart / architecture / concept diagram.
+- `references/concept-archetypes.md` — the 9-ramp educational color system + a
+ library of diagram archetypes (timeline, tree, quadrant, layered stack,
+ before/after, hub-spoke, cross-section). Read for educational / non-software visuals.
+- `references/dark-tech.md` — the dark "infra" token variant (carries the old
+ architecture-diagram aesthetic). Read for cloud/infra/system architecture diagrams.
+- `references/throwaway-editors.md` — the single-file editor recipe and the
+ copy-to-clipboard export pattern that survives `file://`. Read when the artifact
+ needs interactive controls that export state back to a prompt.
+- `references/fidelity-and-verify.md` — the throwaway↔presentation fidelity dial,
+ the multi-variant comparison layout, and the mandatory browser-vision verify loop.
+
+## Templates
+
+- `templates/base.html` — document scaffold with the house-style `
+
+
+
+
Section · Context
+
Artifact Title
+
One-sentence framing of what this artifact is and who it's for.
+
+
Overview
+
Body copy. Keep paragraphs readable; let layout carry structure.
+
+
+
Metric
42
+
Metric
7
+
Needs attention
3
+
Metric
98%
+
+
+
Note. Use callouts for the one thing the reader must not miss.